<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i5e35239</article-id>
      <article-id pub-id-type="pmid">35639469</article-id>
      <article-id pub-id-type="doi">10.2196/35239</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Transformer- and Generative Adversarial Network–Based Inpatient Traditional Chinese Medicine Prescription Recommendation: Development Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Boie</surname>
            <given-names>Sebastian Daniel</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Hong</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9401-8409</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Jiajun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1244-4701</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Ni</surname>
            <given-names>Wandong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <address>
            <institution>Physician Qualification Program</institution>
            <institution>Certification Center of Traditional Chinese Medicine</institution>
            <institution>State Administration of Traditional Chinese Medicine</institution>
            <addr-line>No 5 Beixian Ge Road</addr-line>
            <addr-line>Xicheng District</addr-line>
            <addr-line>Beijing, 100053</addr-line>
            <country>China</country>
            <phone>86 13311127900</phone>
            <email>2592967878@qq.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5488-701X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>Youlin</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1488-5914</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Kunjing</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2160-639X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>Daying</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7193-9950</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Jing</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6620-3463</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Guanganmen Hospital</institution>
        <institution>China Academy of Chinese Medical Sciences</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>School of Electronic Information Engineering</institution>
        <institution>Wuxi University</institution>
        <addr-line>Wuxi</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Physician Qualification Program</institution>
        <institution>Certification Center of Traditional Chinese Medicine</institution>
        <institution>State Administration of Traditional Chinese Medicine</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>School of Electronic Engineering and Optoelectronic Technology</institution>
        <institution>Nanjing University of Science and Technology</institution>
        <addr-line>Nanjing</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Wandong Ni <email>2592967878@qq.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>31</day>
        <month>5</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>5</issue>
      <elocation-id>e35239</elocation-id>
      <history>
        <date date-type="received">
          <day>27</day>
          <month>11</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>8</day>
          <month>1</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>5</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>11</day>
          <month>4</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Hong Zhang, Jiajun Zhang, Wandong Ni, Youlin Jiang, Kunjing Liu, Daying Sun, Jing Li. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 31.05.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/5/e35239/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Traditional Chinese medicine (TCM) practitioners usually follow a 4-step evaluation process during patient diagnosis: observation, auscultation, olfaction, inquiry, pulse feeling, and palpation. The information gathered in this process, along with laboratory test results and other measurements such as vital signs, is recorded in the patient’s electronic health record (EHR). In fact, all the information needed to make a treatment plan is contained in the EHR; however, only a seasoned TCM physician could use this information well to make a good treatment plan as the reasoning process is very complicated, and it takes years of practice for a medical graduate to master the reasoning skill. In this digital medicine era, with a deluge of medical data, ever-increasing computing power, and more advanced artificial neural network models, it is not only desirable but also readily possible for a computerized system to mimic the decision-making process of a TCM physician.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to develop an assistive tool that can predict prescriptions for inpatients in a hospital based on patients’ clinical EHRs.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Clinical health records containing medical histories, as well as current symptoms and diagnosis information, were used to train a transformer-based neural network model using the corresponding physician’s prescriptions as the target. This was accomplished by extracting relevant information, such as the patient’s current illness, medicines taken, nursing care given, vital signs, examinations, and laboratory results from the patient’s EHRs. The obtained information was then sorted chronologically to produce a sequence of data for the patient. These time sequence data were then used as input to a modified transformer network, which was chosen as a prescription prediction model. The output of the model was the prescription for the patient. The ultimate goal is for this tool to generate a prescription that matches what an expert TCM physician would prescribe. To alleviate the issue of overfitting, a generative adversarial network was used to augment the training sample data set by generating noise-added samples from the original training samples.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>In total, 21,295 copies of inpatient electronic medical records from Guang’anmen Hospital were used in this study. These records were generated between January 2017 and December 2018, covering 6352 types of medicines. These medicines were sorted into 819 types of first-category medicines based on their class relationships. As shown by the test results, the performance of a fully trained transformer model can have an average precision rate of 80.58% and an average recall rate of 68.49%.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>As shown by the preliminary test results, the transformer-based TCM prescription recommendation model outperformed the existing conventional methods. The extra training samples generated by the generative adversarial network help to overcome the overfitting issue, leading to further improved recall and precision rates.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>traditional Chinese medicine</kwd>
        <kwd>transformer</kwd>
        <kwd>generative adversary networks</kwd>
        <kwd>electronic health records</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>natural language processing</kwd>
        <kwd>machine learning</kwd>
        <kwd>word2Vec</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The widespread use of electronic health record (EHR) systems has led to the explosive growth of digitized health care data. As the amount and complexity of data grow, medical analysis and decision-making become increasingly time-consuming and error prone. In reality, a human physician cannot fully use all the available information at his or her disposal in a timely fashion. Therefore, harnessing the information contained in EHR data, most of which is in textual form, is critical for driving innovation research, improving health care quality, and reducing costs. Natural language processing (NLP) is essential for transforming relevant information sequestered in freestyle texts into structured data for further computerized processing. The development of a predictive model with EHR data was motivated by the desire to offer a medication-oriented decision support tool to clinical health care providers. To build such a predictive model, we used NLP techniques to convert a patient’s EHR data into a representation, which then becomes the input to a deep learning model to predict medical events, such as medication orders.</p>
      <p>Biomedical NLP has experienced great progress in the past 30 years [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>] and has become especially active in recent years [<xref ref-type="bibr" rid="ref3">3</xref>]. Previously, EHR data were analyzed using traditional machine learning and statistical techniques such as logistic regression, support vector machine, and random forest [<xref ref-type="bibr" rid="ref4">4</xref>]. However, in recent years, as reviewed in the studies by Shickel et al [<xref ref-type="bibr" rid="ref5">5</xref>], Sheikhalishahi et al [<xref ref-type="bibr" rid="ref6">6</xref>], and Miotto et al [<xref ref-type="bibr" rid="ref7">7</xref>], many research efforts have been devoted to the application of deep learning techniques to EHR data for clinical informatics tasks. Autoencoders have been used by researchers [<xref ref-type="bibr" rid="ref8">8</xref>] to predict a specific set of diagnoses. A long short-term memory (LSTM) sequence model [<xref ref-type="bibr" rid="ref9">9</xref>] was trained to provide patient-specific and time-specific predictions of medication orders for patients who are hospitalized [<xref ref-type="bibr" rid="ref10">10</xref>]. A convolutional neural network (CNN) model was used to predict discharge medications using the information available at admission [<xref ref-type="bibr" rid="ref11">11</xref>]. Numerous articles were surveyed in the study by Goldstein et al [<xref ref-type="bibr" rid="ref12">12</xref>] regarding the development of a risk prediction model using EHR data. A comprehensive study on applying deep learning techniques to EHR data for a variety of prediction problems was reported in the study by Rajkomar et al [<xref ref-type="bibr" rid="ref13">13</xref>]. Recurrent neural networks were successfully trained using EHR data to detect medical events [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>].</p>
      <p>The research on applying artificial intelligence in traditional Chinese medicine (TCM) has been very active in the past decade [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. Data mining techniques have been used for TCM syndrome modeling and prescription recommendation for diabetes [<xref ref-type="bibr" rid="ref19">19</xref>]. The PageRank algorithm [<xref ref-type="bibr" rid="ref20">20</xref>] was modified and applied to TCM prescription recommendations [<xref ref-type="bibr" rid="ref21">21</xref>]. In our previous work [<xref ref-type="bibr" rid="ref17">17</xref>], a CNN was used to predict TCM diseases, and XGBoost, along with other neural networks, was used to predict TCM syndromes. Following the sequence-to-sequence paradigm, researchers from Peking University used bidirectional gated recurrent neural networks to generate TCM prescriptions from symptom descriptions [<xref ref-type="bibr" rid="ref22">22</xref>]. They proposed a coverage mechanism along with a soft loss function as a remedy for the repetition problem they encountered. However, the requirement of curated descriptions of symptoms as inputs hinders the practicality of this approach. Ideally, the model generates TCM prescriptions directly from raw EHR data, similar to how a human TCM physician conducts deductive reasoning.</p>
      <p>Generating prescriptions from raw EHR data typically comprises 2 parts. The first part uses biomedical NLP [<xref ref-type="bibr" rid="ref3">3</xref>] techniques to extract relevant information used by a human physician to form a feature representation [<xref ref-type="bibr" rid="ref23">23</xref>]. The second part uses deep learning techniques [<xref ref-type="bibr" rid="ref7">7</xref>] to map this feature representation into a prescription order.</p>
      <p>The primary task of biomedical NLP is to extract relevant information from clinical narratives written in free-form text and store the gathered information as structured data. Numerous deep learning techniques [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref26">26</xref>], such as bidirectional LSTM (BiLSTM), have been used in the biomedical NLP field. Both BiLSTM conditional random field (CRF) and transformer CRF have been used for named entity recognition (NER) of EHR notes written in Chinese [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. The recognized entities are then formed into distinct tokens. Then, the feature representation of a patient’s EHR data becomes a sequence of tokens. The tokens are then converted into real-valued multidimensional vectors using word embedding techniques [<xref ref-type="bibr" rid="ref29">29</xref>].</p>
      <p>The purpose of this study was to develop an assistive tool that can prescribe TCM prescriptions for inpatients in a hospital based on the patient’s clinical EHRs. The predictive model for TCM prescription generation is based on a sequence-transducing model called the transformer [<xref ref-type="bibr" rid="ref30">30</xref>]. This model is entirely based on attention, replacing the recurrent layers most commonly used in encoder-decoder architectures with multihead self-attention. The training used in this predictive model was supervised training with human-authored prescriptions contained in the EHR data set as the training targets. Furthermore, a generative adversarial network (GAN) [<xref ref-type="bibr" rid="ref31">31</xref>] model was designed to augment the training set to further enhance the overall system performance by reducing the effects of overfitting.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>This section is arranged as follows: the overall system architecture is briefly described; then, each constituent subsystem, which may comprise some functional blocks, is introduced; finally, the training process is described in the <italic>Training</italic> subsection, where a GAN model was used to generate noise-added samples from the original samples.</p>
      <sec>
        <title>System Overview</title>
        <p>Hospitals and medical institutes in China are rapidly moving toward standardizing their EHRs to conform to the regulations and specifications issued by the Ministry of Health of the People’s Republic of China [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>]. A standard EHR document for a patient may contain up to 53 parts, depending on the patient’s situation. These may include the following:</p>
        <list list-type="bullet">
          <list-item>
            <p>A first page record containing the patient’s basic personal information, such as sex, age, occupation, and marital status</p>
          </list-item>
          <list-item>
            <p>An admission record containing the description of a patient’s illness upon admission to the hospital, including chief complaints, medical history, and family medical history</p>
          </list-item>
          <list-item>
            <p>A laboratory tests record containing the list of tests and the corresponding results</p>
          </list-item>
          <list-item>
            <p>A nursing record containing nurse notes of the patient’s condition, treatments taken and nursing care taken, body temperatures and vital signs taken, and physician’s orders</p>
          </list-item>
          <list-item>
            <p>A treatment procedure record containing the entire in-hospital diagnosis and treatment process and any changes to the patient’s illness or illnesses</p>
          </list-item>
        </list>
        <p>A high-level block diagram of the proposed system is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. The system comprises 4 subsystems: the NLP subsystem, the feature extraction subsystem, the vectorization subsystem, and the prescription prediction subsystem. The NLP subsystem processes the EHR file and produces structured data, which in turn are processed by the feature extraction subsystem to extract relevant clinical information for prescription prediction. The vectorization subsystem maps the sequence of tokens written in Chinese characters to digital numbers, presented as a vector in a multidimensional space. The prescription prediction subsystem, which is a transformer-based deep learning model, automatically generates a prescription based on input vector data. Together, the first 3 subsystems accomplish the task of extracting relevant information from an EHR file to form input variables for the prediction model. Similar representation learning operations were described in our previous paper [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
        <p>In short, NLP normalizes the raw EHR data, the feature extractor converts the normalized data into a sequence of tokens, the vectorization subsystem maps the tokens into vectors of real numbers, and the predictive model performs the reasoning process to produce a prescription.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Block diagram of the prescription generation system. EHR: electronic health record; NLP: natural language processing.</p>
          </caption>
          <graphic xlink:href="medinform_v10i5e35239_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>The NLP Subsystem</title>
        <p>This subsystem is responsible for generating structured data from original EHR documents. The internal block diagram of the subsystem is shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. There are 3 functional blocks in this subsystem: the preprocessing block, NER block, and British Medical Journal block.</p>
        <p>The preprocessing block cleans the raw EHR document by removing pictures and unusable components. This ensures the completeness and accuracy of the electronic medical records. Electronic medical records with incomplete or inconsistent information are discarded.</p>
        <p>After the initial cleaning, the content of the EHR file is then divided into distinct sections. For example, the admission record is divided into sections of chief complaints, medical history, and others. Then, all the resultant sections are sorted, formatted, and subsequently fed to the NER block.</p>
        <p>Only a small part of the EHR document is in a fixed format, and the remainder is in unstructured freestyle narratives. For fixed-format texts, a script is used to extract named entities to form structured data.</p>
        <p>For freestyle narratives, a functional block called entity recognition is used to extract named entities to form structured data entries. The NER block is implemented using a BiLSTM network with CRF (BiLSTM-CRF) [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
        <p>Then, the extracted named entities such as symptoms, illness, medicine, examinations, and tests are further standardized according to a Chinese version of the British Medical Journal Best Practice knowledge base.</p>
        <p><xref rid="figure3" ref-type="fig">Figure 3</xref> shows an example of the processing result, where the admission record of a raw EHR note is converted into structured data, with the marked words being named entities.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Block diagram of the named entity recognition subsystem. BiLSTM: bidirectional long short-term memory; EMR: electronic medical record; BiLSTM-CRF: Bidirectional long short term memory – conditional random fields; BMJ: British Medical Journal.</p>
          </caption>
          <graphic xlink:href="medinform_v10i5e35239_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Example of converting a freestyle narrative into structured data. EHR: electronic health record.</p>
          </caption>
          <graphic xlink:href="medinform_v10i5e35239_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>The Feature Extraction Subsystem</title>
        <p>To effectively mimic the reasoning process conducted by a human physician, accurate and relevant input variables must be chosen properly. These variables should represent the complete set of factors that a human physician should take into consideration when making treatment decisions. <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> summarizes the predominant factors that TCM experts consider when making treatment decisions.</p>
        <p>The feature extraction subsystem extracts the aforementioned key features from the standardized structured data to form a sequence of tokens. <xref rid="figure4" ref-type="fig">Figure 4</xref> shows an example of this feature extraction, in which a sequence of tokens is generated from structured data.</p>
        <boxed-text id="box1" position="float">
          <title>Text type and the content to extract.</title>
          <p>
            <bold>Demography</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Sex, age, height, weight, and BMI</p>
            </list-item>
          </list>
          <p>
            <bold>Chief complaints</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Symptoms and signs</p>
            </list-item>
          </list>
          <p>
            <bold>Recent medical history</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Symptoms, signs, and general information</p>
            </list-item>
          </list>
          <p>
            <bold>Past medical history</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Past illness and medicines taken</p>
            </list-item>
          </list>
          <p>
            <bold>Present illness</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Tongue coating and pulses</p>
            </list-item>
          </list>
          <p>
            <bold>Body check</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Vital signs</p>
            </list-item>
          </list>
          <p>
            <bold>Treatment process records</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Current illness situation and treatment plan</p>
            </list-item>
          </list>
          <p>
            <bold>Physician’s orders</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Prescriptions</p>
            </list-item>
          </list>
          <p>
            <bold>Nursing notes</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Vital signs and medication records</p>
            </list-item>
          </list>
          <p>
            <bold>Examination reports</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Examination items and findings</p>
            </list-item>
          </list>
          <p>
            <bold>Laboratory reports</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Items tested and qualitative and quantitative test results</p>
            </list-item>
          </list>
        </boxed-text>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Example of converting structured data into a sequence of tokens.</p>
          </caption>
          <graphic xlink:href="medinform_v10i5e35239_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>The Vectorization Subsystem</title>
        <sec>
          <title>Overview</title>
          <p>Until this point, all medical information needed to make a treatment decision was encapsulated in textual data expressed in Chinese characters. To be used by the deep learning network—the Transformer—the information must be mapped into a digital variable. In this vectorization process, a Chinese word or phrase is represented as a real-valued vector in multidimensional feature space. This section explains how tokenized features are further processed through word embedding.</p>
        </sec>
        <sec>
          <title>Training the Word Embedding Model</title>
          <p>The corpus was a collection of 102,596 electronic medical records from Guang’anmen Hospital and other hospitals. The <italic>Jieba</italic> tokenizer was used to perform tokenization. The open-source modeling tool <italic>Gensim</italic> was used to train the word2vec [<xref ref-type="bibr" rid="ref29">29</xref>] model with the following major parameters: <italic>min_count</italic>=2, <italic>vector_size</italic>=100, <italic>window</italic>=5, <italic>sg</italic>=1, <italic>hs</italic>=1, and <italic>epochs</italic>=50.</p>
          <p>The Skip-Gram model was used, as indicated by the parameters. Each word was represented by a real-valued vector of 100 dimensions.</p>
        </sec>
        <sec>
          <title>Vectorization</title>
          <p>Once the word embedding model is trained, each token is represented by a 100-dimension vector. For each word in the input sequence, a unique identifier is assigned using a numerical-type value expressed as a name-value-unit before another unique identifier is assigned. Once all tokens are converted into vectors, the vectors are then concatenated to form a single vector variable, which then serves as the input to the transformer.</p>
          <p>The NLP, feature extraction, and vectorization subsystems together accomplish the task of feature learning by converting an EHR document into a multidimensional real-valued vector. <xref rid="figure5" ref-type="fig">Figure 5</xref> shows an example of mapping from EHR text to word vectors.</p>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>Illustration of converting electronic health record text to word vectors.</p>
            </caption>
            <graphic xlink:href="medinform_v10i5e35239_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>The Transformer Subsystem</title>
        <p>The transformer subsystem is responsible for recommending a prescription for every given input embedding, as shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>. The subsystem is described in the following paragraphs.</p>
        <p>Input embedding is a vector of <italic>max_num_tokens× vector_size</italic> dimensions. For example, <italic>max_num_tokens</italic>=759 and <italic>vector_size</italic>=100<italic>.</italic> Zero padding is used if the number of tokens in a sequence is smaller than <italic>max_num_tokens.</italic> Conversely, if the number of tokens in a sequence is larger than <italic>max_num_tokens</italic>, the number of tokens is capped at <italic>max_num_tokens</italic> by dropping off tokens corresponding to the oldest time stamp with respect to the current prescription generation time. The input embedding sample is first added to the position vector of the same size, becoming the input to the first encoder.</p>
        <p>The main body of the subsystem comprises 2 identical cascaded transformer encoders. Unlike the encoder of the original transformer [<xref ref-type="bibr" rid="ref30">30</xref>], which comprises 6 identical layers, the encoder used in this research had only 1 layer with 4 sublayers. The first was a multihead self-attention layer with <italic>Multi_heads</italic>=4 and <italic>head_dim</italic>=8. The second was a residual layer of 100 neurons with normalization. The third was a simple, position-wise, fully connected feedforward network of 2048 neurons. The fourth was a residual layer of 100 neurons with normalization.</p>
        <p>The second encoder was followed by a linear layer, a feedforward layer of 2048 neurons, a hidden layer, and an output layer, as shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>. The output layer comprised 819 neurons with a sigmoid activation function. Each of the 819 neurons corresponded to an herbal ingredient. The hidden layer comprised 128 neurons with a dropout mechanism and normalization. The dropout rate was set to 0.4740. The purpose of this hidden layer was to prevent overfitting.</p>
        <p>The final result from the output layer was a list of probabilities for the 819 drug ingredients, valued between 0 and 1. The recommended prescription was then obtained by setting a threshold for these probabilities.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>The transformer subsystem.</p>
          </caption>
          <graphic xlink:href="medinform_v10i5e35239_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Training</title>
        <sec>
          <title>Training the Transformer</title>
          <p>Training of the transformer is a supervised learning process. The input is a real-valued vector representation of a patient’s EHR, and the output is the prescription. The learning goal is for a machine-generated prescription to match the medical order prescribed by a human physician.</p>
        </sec>
        <sec>
          <title>Augmenting the Training Data</title>
          <p>To alleviate the overfitting effect of the proposed prediction model, a GAN [<xref ref-type="bibr" rid="ref31">31</xref>] network was used to augment the training data set. Following the fundamental idea of the GAN network, the generative model G is trained to represent the distribution of the original training data set, and the discriminative model D is trained to detect whether the sample originates from the original sample set or from the output of the generative model.</p>
          <p>During the training phase, the entire system looks like that shown in <xref rid="figure7" ref-type="fig">Figure 7</xref>. For every original training sample, there is a noise-added sample. The use of a GAN in this system effectively doubled the number of training samples.</p>
          <p>The internal structure of our GAN network was designed as shown in <xref rid="figure8" ref-type="fig">Figure 8</xref>. Generator G comprises 2 identical LSTM layers, each with a size of 279. Each LSTM layer is followed by a normalization layer with a residual connection. The input to the discriminator G could be either an original word embedding sample or a noise-added sample generated by the generator G. The discriminator D comprises an LSTM layer with a size of 279, a residual and normalization layer with a size of 100, and a full connection layer with a size of 256. Finally, the discriminator D outputs a binary value using a sigmoid function.</p>
          <p>We followed a typical GAN network training procedure [<xref ref-type="bibr" rid="ref31">31</xref>] to train the GAN subsystem, simultaneously training the discriminator and generator. The discriminator and generator alternate in their training until a Nash equilibrium is reached.</p>
          <p>The generator first produces a <italic>batch_size</italic> noise-added EHR, embedding samples with randomly initialized coefficients of the generator network. These samples are concatenated with the original noise-free EHR embedding samples to form (2×<italic>batch_size</italic>) embedding samples, each with <italic>max_num_tokens×vector_size</italic> real values. For example, we can have <italic>batch_size</italic>=500, <italic>max_num_tokens</italic>=560, <italic>vector_size</italic>=100. These (2×<italic>batch_size</italic>) samples were used as inputs to the discriminator. For every input sample, an output label indicates whether the sample is from the true original embedding or from the generator. The discriminator network was trained using a backpropagation algorithm with the objective of minimizing the prediction error. The training of the discriminator is halted when the binary cross-entropy loss function stops decreasing. The discriminator training is then temporarily halted to yield to the generator training.</p>
          <p>To train the generator, all network coefficients of the discriminator must be frozen. The discriminator now works in tandem with the generator during generator training. The generator produces <italic>batch_size</italic> noise-added embedding samples, and for every sample, the discriminator outputs a prediction. The generator updates its parameters using a backpropagation algorithm based on the discriminator output. The training of the generator is halted when the binary cross-entropy loss function stops increasing. The generator training is then temporarily halted to yield the discriminator training.</p>
          <p>The aforementioned discriminator and generator training processes together form 1 training epoch. The entire GAN network training is accomplished through several epochs. The training stops when a Nash equilibrium is reached.</p>
          <p>The entire training process is illustrated using the Python pseudocode included in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          <fig id="figure7" position="float">
            <label>Figure 7</label>
            <caption>
              <p>Block diagram of the predictive modeling system during the training phase. EHR: electronic health record; GAN: generative adversarial network; NLP: natural language processing.</p>
            </caption>
            <graphic xlink:href="medinform_v10i5e35239_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure8" position="float">
            <label>Figure 8</label>
            <caption>
              <p>The internal structure of the generative adversarial network subsystem. LSTM: long short-term memory; *size of the neural network used in that layer.</p>
            </caption>
            <graphic xlink:href="medinform_v10i5e35239_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>This study received institutional review board review through Guanganmen Hospital Ethic Committee (SQ2017YFGX 060073).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Set</title>
        <p>EHRs generated in Guang’anmen Hospital between January 1, 2017, and December 31, 2018, were used as the data set in this study. Initially, there were 27,846 copies of EHR notes, out of which 6551 (23.53%) copies were discarded because of quality control. An EHR note should be discarded if it satisfies one of the following conditions:</p>
        <list list-type="bullet">
          <list-item>
            <p>The note is incomplete for missing certain basic pages.</p>
          </list-item>
          <list-item>
            <p>The note contains inconsistent information.</p>
          </list-item>
          <list-item>
            <p>The note does not use standard descriptions.</p>
          </list-item>
          <list-item>
            <p>The note contains special EHR circumstances such as chemotherapy, after an operation, and removal of fracture settings.</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Evaluation Metrics</title>
        <p>The data set contained 6352 drug varieties. A complete TCM prescription includes drug ingredients, dosages, and decoction preparation instructions. It is still very challenging, if not impossible, for a machine to generate such a complete TCM prescription. At our current stage of research, we focus only on the drug ingredients of a prescription.</p>
        <p>Judging whether the 2 TCM prescriptions are the same is often not straightforward, given the distinctive nature of TCM [<xref ref-type="bibr" rid="ref35">35</xref>]. Often, 2 different herbs may have the same medical effect. When a TCM physician prescribes a medication order, he or she often has multiple choices at hand for herbal ingredients. As a result, the 2 TCM physicians may prescribe different herbs for the same patient with the same diagnosed condition. Therefore, it is necessary to have a unified method of evaluating machine-generated prescriptions. To this end, we need a higher level of abstraction. <xref rid="figure9" ref-type="fig">Figure 9</xref> shows an example of the organization of TCM drugs. In this example, 2 TCM drugs (antiphlogistic powder and Jingfang decoction) have different herbal ingredients but belong to the same parent drug category and have the same medical treatment effect. In our research, we concluded that the recommended drug should be considered a correct recommendation as long as the recommended drug belongs to the same parent category as that of the human-authored prescription.</p>
        <p>To quantitatively evaluate the performance of the transformer-based deep learning model, we compared the prescription generated by the machine with that prescribed by a human physician. Here, we used the metrics of <italic>precision rate</italic> and <italic>recall rate</italic>, which we based on 3 variables. True positive (TP) is defined as the number of drugs that exist in the physician’s prescription and also exist in the machine’s prescription. False positive (FP) is the number of drugs that do not exist in the physician’s prescription but exist in the machine’s prescription. False negative (FN) is defined as the number of drugs that exist in the physician’s prescription but not in the machine’s prescription. With these definitions, we defined the precision and recall rates as follows:</p>
        <disp-formula>Precision rate = TP / (TP + FP) <bold>(1)</bold></disp-formula>
        <disp-formula>Recall rate = TP / (TP + FN) <bold>(2)</bold></disp-formula>
        <fig id="figure9" position="float">
          <label>Figure 9</label>
          <caption>
            <p>Classification of herbal drugs.</p>
          </caption>
          <graphic xlink:href="medinform_v10i5e35239_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Hyperparameter Tuning With GridSearchCV</title>
        <p>The data set was divided into training and test sets, with the training set comprising 90% of the data set and the test set comprising the rest. The model was trained using a 10-fold cross-validation method; that is, the training set was randomly split into 10 folds, with the model being trained 10 times. During each of the 10 training times, the hyperparameters were tuned using the GridSearchCV method. Each training resulted in a set of hyperparameters, with the ultimate hyperparameters being the average of these 10 sets of parameters.</p>
        <p>The values of the hyperparameters of the transformer network model have a great influence on the accuracy of the model. The optimal values of these parameters were determined through iterations using the grid search method. The sparse characters of each type were embedded into a d-dimensional embedding layer. Then, all vectors were combined using a new method: vectors of the same type and time were averaged using the weights of self-learning.</p>
        <p>The model was optimized using a minimal log loss. Many regularization methods were used, such as the vector loss rate and the embedded layer loss rate. In addition, small-scale L2 weight punishment was used, which increased the punishment for large weights. The training batch size was chosen as 128, placing sentences with similar sizes into the same batch. Each batch contained approximately 12,000 words. Finally, the multilabel task was processed using an Adam function. For multilabel tasks, the input with the last time stamp was multiplied with the special end of sequence embedding. The training was executed using the Kears framework on a server with 8 NVIDIA P100 graphics processing unit. The fine-tuned hyperparameters along with their respective ranges are shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Some hyperparameters of the model.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="260"/>
            <col width="340"/>
            <thead>
              <tr valign="top">
                <td>Hyperparameters</td>
                <td>Values</td>
                <td>Parameter range</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Gradient</td>
                <td>0.1245</td>
                <td>(0.1, 0.5, 1.0, 1.1)</td>
              </tr>
              <tr valign="top">
                <td>Attention heads</td>
                <td>4</td>
                <td>(4, 8)</td>
              </tr>
              <tr valign="top">
                <td>Vector loss rate</td>
                <td>0.4410</td>
                <td>(0.25, 0.35, 0.5)</td>
              </tr>
              <tr valign="top">
                <td>Hidden layer loss rate</td>
                <td>0.4740</td>
                <td>(0.25, 0.35, 0.5)</td>
              </tr>
              <tr valign="top">
                <td>Learning rate</td>
                <td>0.4375</td>
                <td>(0, 1)</td>
              </tr>
              <tr valign="top">
                <td>L2 punishment rate</td>
                <td>0.000001566</td>
                <td>(0, 0.01)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Experimental Results</title>
        <p>To intuitively explain our experimental results, we start with a concrete example that illustrates how EHR notes lead to prescription orders. An example of this is shown in <xref rid="figure10" ref-type="fig">Figure 10</xref>. The left side shows a snapshot of the patient’s EHR. On the right side is a table showing a side-by-side comparison between a human-authored order and the prescription generated by our model. The physician’s order contains 12 ingredients, whereas the model’s order has 11. The first 5 ingredients are identical on both sides. The sixth ingredient from each side is the same, although they have different Chinese names. This is because the physician used a nickname for the herb. The remaining ingredients differ not only in name but also in substance. However, these 2 orders are still considered equivalent so far as the medical treatment effect is concerned. This is because in TCM terminology, a diagnosis must conclude with the name of the disease (illness) and a list of syndromes [<xref ref-type="bibr" rid="ref17">17</xref>]. In this particular case, the diagnosed disease is <italic>emaciation-thirst</italic>, with the primary syndrome being <italic>kidney and liver deficiency</italic> and the secondary syndrome being <italic>dampness and stasis</italic>. The first 6 herbal ingredients target the primary syndrome. The remaining ingredients in each prescription are for the treatment of the secondary syndrome called <italic>dampness and stasis</italic>. As these 2 orders are only slightly different in their ingredients for treating secondary syndrome, they are treated as the same prescription in our research.</p>
        <p>To further explain this prescription comparison, we present another picture, as shown in <xref rid="figure11" ref-type="fig">Figure 11</xref>. The physician’s order is called <italic>Qiju Dihuang pill</italic>, and the model’s order is called <italic>Liuwei Dihuang pill</italic>. They are category II prescriptions that belong to the same parent category TCM prescription called <italic>nourishing liver and kidney</italic>. They differ only in how to dispel dampness and resolve phlegm to address only the secondary syndrome.</p>
        <p>To evaluate the performance of the transformer-based predictive model, we first conducted model training using only the original samples, purposefully excluding the noise-added samples. The results are described in the following paragraphs.</p>
        <p>On the basis of the time sequences, the system produced prescription recommendations at admission, 24 hours after admission, 48 hours after admission, 3 days after admission, and 1 week after admission. The test results are shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <p>From <xref ref-type="table" rid="table2">Table 2</xref>, we first observe that the precision and recall rates obtained from the training data set are higher than their respective counterparts from the test data set. This is understandable as the model has seen the samples from the training data set before but not from the test data set. The second observation is that as time progresses, both the precision and recall rates improve. After admission, at each subsequent medication order time, more relevant information is collected, and the prediction becomes more accurate. Although the number of feature tokens was &#60;260 for 98% of the patients at the time of admission, this number increased to 296 in 24 hours, 333 in 48 hours, 366 in 72 hours, and 759 in 7 days. In our experiment, we set <italic>max_num_tokens</italic>=759<italic>.</italic> This means that when the number of feature tokens was &#60;759, zero padding was used, and clipping was used when there were &#62;759 feature tokens. Selecting the proper value for <italic>max_num_tokens</italic> is important for balancing the trade-off between overall system performance and computational efficiency. If the value is too large, training and inferencing will consume too much computation horsepower. If the value is too small, then some critical information gathered at admission will be lost because of clipping, leading to reduced precision and recall rates for prescription predictions at a time that is far from the admission time (eg, 2 weeks after admission).</p>
        <p>The second set of experimental results was obtained using more training samples to train the predictive model. The size of the training data set was doubled, as for every training sample, a noise-added sample was generated by the GAN network. The precision and recall rates are listed in <xref ref-type="table" rid="table3">Table 3</xref>.</p>
        <p>As can be seen in <xref ref-type="table" rid="table3">Table 3</xref>, both the precision and recall rates consistently improved by a noticeable margin. The results convincingly prove that inserting noise-added training samples generated by the GAN module can effectively overcome the overfitting issue, leading to better prediction performance.</p>
        <fig id="figure10" position="float">
          <label>Figure 10</label>
          <caption>
            <p>Side-by-side comparison of physician’s order versus model’s order.</p>
          </caption>
          <graphic xlink:href="medinform_v10i5e35239_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure11" position="float">
          <label>Figure 11</label>
          <caption>
            <p>Prescription comparison: physician’s order versus model’s order.</p>
          </caption>
          <graphic xlink:href="medinform_v10i5e35239_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>The precision rates and recall rates with transformer only.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="220"/>
            <col width="190"/>
            <col width="220"/>
            <col width="220"/>
            <thead>
              <tr valign="top">
                <td>Time</td>
                <td colspan="2">Training set</td>
                <td colspan="2">Test set</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision rate (%)</td>
                <td>Recall rate (%)</td>
                <td>Precision rate (%)</td>
                <td>Precision rate (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Admission</td>
                <td>81.58</td>
                <td>69.49</td>
                <td>73.82</td>
                <td>61.25</td>
              </tr>
              <tr valign="top">
                <td>In 24 hours</td>
                <td>83.37</td>
                <td>71.88</td>
                <td>74.56</td>
                <td>62.69</td>
              </tr>
              <tr valign="top">
                <td>In 48 hours</td>
                <td>83.92</td>
                <td>71.26</td>
                <td>74.81</td>
                <td>63.04</td>
              </tr>
              <tr valign="top">
                <td>In 3 days</td>
                <td>85.16</td>
                <td>73.89</td>
                <td>76.24</td>
                <td>65.38</td>
              </tr>
              <tr valign="top">
                <td>In 1 week</td>
                <td>87.02</td>
                <td>75.17</td>
                <td>77.94</td>
                <td>67.15</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>The precision rates and recall rates with transformer+generative adversarial network.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="220"/>
            <col width="190"/>
            <col width="220"/>
            <col width="220"/>
            <thead>
              <tr valign="top">
                <td>Time</td>
                <td colspan="2">Training set</td>
                <td colspan="2">Test set</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision rate (%)</td>
                <td>Recall rate (%)</td>
                <td>Precision rate (%)</td>
                <td>Recall rate (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Admission</td>
                <td>82.22</td>
                <td>70.65</td>
                <td>80.58</td>
                <td>68.49</td>
              </tr>
              <tr valign="top">
                <td>In 24 hours</td>
                <td>84.15</td>
                <td>72.18</td>
                <td>82.37</td>
                <td>70.8</td>
              </tr>
              <tr valign="top">
                <td>In 48 hours</td>
                <td>84.32</td>
                <td>72.56</td>
                <td>82.92</td>
                <td>70.26</td>
              </tr>
              <tr valign="top">
                <td>In 3 days</td>
                <td>87.04</td>
                <td>75.10</td>
                <td>85.04</td>
                <td>74.38</td>
              </tr>
              <tr valign="top">
                <td>In 1 week</td>
                <td>88.91</td>
                <td>76.79</td>
                <td>86.82</td>
                <td>76.23</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Comparison Study</title>
        <p>To compare the performance of our proposed model with that of existing prescription generation models, we implemented 3 other models. The CNN-based model [<xref ref-type="bibr" rid="ref11">11</xref>] comprises a word embedding layer, a convolution layer that contains 3 filters of different sizes, a pooling layer, and a full connection layer. The output layer contains 819 neurons, equal to the number of prescribed herb varieties. The seq2seq [<xref ref-type="bibr" rid="ref36">36</xref>] model comprises a CNN encoder and an LSTM decoder. The MedAR [<xref ref-type="bibr" rid="ref37">37</xref>] model comprises a word embedding layer, followed by an attention layer, and finally, a RethinkNet layer to complete the multilabel classification. The learning rate was 0.001, the dropout rate was 0.8, and the optimization function was Adam. The final output layer used the sigmoid function, where all other layers used the non-linear activation function ReLU, which outputs an input x as zero if x is negative, and outputs x itself if x is larger than or equal to zero. <xref ref-type="table" rid="table4">Table 4</xref> shows the respective precision and recall rates at admission for all 4 models in discussion. The results suggest that the proposed model has superior performance in terms of precision and recall rates.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Performance comparison for different models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="560"/>
            <col width="240"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Precision rate (%)</td>
                <td>Recall rate (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Convolutional neural network</td>
                <td>47.54</td>
                <td>31.00</td>
              </tr>
              <tr valign="top">
                <td>Seq2seq<sup>a</sup></td>
                <td>64.02</td>
                <td>48.74</td>
              </tr>
              <tr valign="top">
                <td>MedAR<sup>b</sup></td>
                <td>71.46</td>
                <td>53.08</td>
              </tr>
              <tr valign="top">
                <td> Transformer+generative adversarial network</td>
                <td>80.58</td>
                <td>68.49</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>Seq2seq: sequence to sequence model.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>MedAR: Medical data attention Rethink Net.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The following tasks have been finished in this research:</p>
        <list list-type="order">
          <list-item>
            <p>Deep learning NLP techniques were used to convert raw Chinese EHR texts into feature representations.</p>
          </list-item>
          <list-item>
            <p>The major contribution of this study is the proposal of a transformer-based predictive modeling scheme for medication order generation from a feature representation of EHR data.</p>
          </list-item>
          <list-item>
            <p>The secondary contribution of this study is the use of GAN to augment the training data set, leading to a noticeable performance improvement of the predictive model. Using the GAN, noise-added samples were generated to double the number of original training samples. This helped alleviate the overfitting problem, making the model more robust in terms of generalization.</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Despite the efforts made in many aspects of the diagnosis and treatment scheme recommendations, there is still much room for improvement. The training data set is still relatively small, and there may be some frequently used medicines that are not included in the training data set. The TCM prescription knowledge base is still incomplete. Some medicines do not have standard names, and no corresponding parent medicine name exists in the database. Therefore, the recommended medicine names are still the original hospital medicine names. For a multilabel prediction task, an increased number of labels will increase the difficulty of the model prediction and lower the prediction accuracy. Therefore, as a more complete knowledge base is developed, the label set will be further optimized, leading to a greater prediction accuracy of the model.</p>
      </sec>
      <sec>
        <title>Future Work</title>
        <p>This paper reports the preliminary research results of automated medication order generation from EHR texts for TCM inpatients who are hospitalized. The recommended medicines include Western and Chinese medicines. For Chinese medicines, only the medicine names are recommended. In the future, the dosage of the herbal ingredients, as well as the medicine preparation instructions, will be included in the recommendations. Improving the model prediction accuracy to the level of category II is also a direction for future work. Future work could expand the training data set to optimize the model.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Python pseudocode for the training of generative adversarial network with Keras Framework.</p>
        <media xlink:href="medinform_v10i5e35239_app1.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BiLSTM</term>
          <def>
            <p>bidirectional long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CRF</term>
          <def>
            <p>conditional random field</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">GAN</term>
          <def>
            <p>generative adversarial network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NER</term>
          <def>
            <p>named entity recognition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">TCM</term>
          <def>
            <p>traditional Chinese medicine</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rindflesch</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Corn</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing: state of the art and prospects for significant progress, a workshop sponsored by the National Library of Medicine</article-title>
          <source>J Biomed Inform</source>
          <year>2013</year>
          <month>10</month>
          <volume>46</volume>
          <issue>5</issue>
          <fpage>765</fpage>
          <lpage>73</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(13)00079-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2013.06.004</pub-id>
          <pub-id pub-id-type="medline">23810857</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(13)00079-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hasan</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Farri</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Consoli</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Reforgiato Recupero</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Petković</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Clinical natural language processing with deep learning</article-title>
          <source>Data Science for Healthcare: Methodologies and Applications</source>
          <year>2019</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>147</fpage>
          <lpage>71</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Houssein</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Mohamed</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>AA</given-names>
            </name>
          </person-group>
          <article-title>Machine learning techniques for biomedical natural language processing: a comprehensive review</article-title>
          <source>IEEE Access</source>
          <year>2021</year>
          <month>10</month>
          <day>13</day>
          <volume>9</volume>
          <fpage>140628</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2021.3119621</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>KP</given-names>
            </name>
          </person-group>
          <source>Machine Learning: A Probabilistic Perspective</source>
          <year>2012</year>
          <publisher-loc>Cambridge, MA, USA</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shickel</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Tighe</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bihorac</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rashidi</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Deep EHR: a survey of recent advances in deep learning techniques for electronic health record (EHR) analysis</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2018</year>
          <month>09</month>
          <volume>22</volume>
          <issue>5</issue>
          <fpage>1589</fpage>
          <lpage>604</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29989977"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/JBHI.2017.2767063</pub-id>
          <pub-id pub-id-type="medline">29989977</pub-id>
          <pub-id pub-id-type="pmcid">PMC6043423</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sheikhalishahi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Lavelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rinaldi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Osmani</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of clinical notes on chronic diseases: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>04</month>
          <day>27</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>e12239</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/2/e12239/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12239</pub-id>
          <pub-id pub-id-type="medline">31066697</pub-id>
          <pub-id pub-id-type="pii">v7i2e12239</pub-id>
          <pub-id pub-id-type="pmcid">PMC6528438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>Deep learning for healthcare: review, opportunities and challenges</article-title>
          <source>Brief Bioinform</source>
          <year>2018</year>
          <month>11</month>
          <day>27</day>
          <volume>19</volume>
          <issue>6</issue>
          <fpage>1236</fpage>
          <lpage>46</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28481991"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bib/bbx044</pub-id>
          <pub-id pub-id-type="medline">28481991</pub-id>
          <pub-id pub-id-type="pii">3800524</pub-id>
          <pub-id pub-id-type="pmcid">PMC6455466</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kidd</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>Deep patient: an unsupervised representation to predict the future of patients from the electronic health records</article-title>
          <source>Sci Rep</source>
          <year>2016</year>
          <month>05</month>
          <day>17</day>
          <volume>6</volume>
          <fpage>26094</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/srep26094"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/srep26094</pub-id>
          <pub-id pub-id-type="medline">27185194</pub-id>
          <pub-id pub-id-type="pii">srep26094</pub-id>
          <pub-id pub-id-type="pmcid">PMC4869115</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Comput</source>
          <year>1997</year>
          <month>11</month>
          <day>15</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
          <pub-id pub-id-type="medline">9377276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rough</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Vardoulakis</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Howell</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Rajkomar</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Predicting inpatient medication orders from electronic health record data</article-title>
          <source>Clin Pharmacol Ther</source>
          <year>2020</year>
          <month>07</month>
          <volume>108</volume>
          <issue>1</issue>
          <fpage>145</fpage>
          <lpage>54</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32141068"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/cpt.1826</pub-id>
          <pub-id pub-id-type="medline">32141068</pub-id>
          <pub-id pub-id-type="pmcid">PMC7325318</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xing</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Predicting discharge medications at admission time based on deep learning</article-title>
          <source>arXiv (forthcoming)</source>
          <year>2017</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1711.01386"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Navar</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Pencina</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ioannidis</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <article-title>Opportunities and challenges in developing risk prediction models with electronic health records data: a systematic review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2017</year>
          <month>01</month>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>198</fpage>
          <lpage>208</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27189013"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocw042</pub-id>
          <pub-id pub-id-type="medline">27189013</pub-id>
          <pub-id pub-id-type="pii">ocw042</pub-id>
          <pub-id pub-id-type="pmcid">PMC5201180</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rajkomar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oren</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Hajaj</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hardt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sundberg</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Flores</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Duggan</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Irvine</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Litsch</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mossin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tansuwan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wexler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ludwig</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Volchenboum</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Madabushi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Howell</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Scalable and accurate deep learning with electronic health records</article-title>
          <source>NPJ Digit Med</source>
          <year>2018</year>
          <month>5</month>
          <volume>1</volume>
          <fpage>18</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-018-0029-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-018-0029-1</pub-id>
          <pub-id pub-id-type="medline">31304302</pub-id>
          <pub-id pub-id-type="pii">29</pub-id>
          <pub-id pub-id-type="pmcid">PMC6550175</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jagannatha</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Bidirectional RNN for medical event detection in electronic health records</article-title>
          <source>Proc Conf</source>
          <year>2016</year>
          <month>06</month>
          <volume>2016</volume>
          <fpage>473</fpage>
          <lpage>82</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27885364"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/n16-1056</pub-id>
          <pub-id pub-id-type="medline">27885364</pub-id>
          <pub-id pub-id-type="pmcid">PMC5119627</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bahadori</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Schuetz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Doctor AI: predicting clinical events via recurrent neural networks</article-title>
          <source>JMLR Workshop Conf Proc</source>
          <year>2016</year>
          <month>08</month>
          <volume>56</volume>
          <fpage>301</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28286600"/>
          </comment>
          <pub-id pub-id-type="medline">28286600</pub-id>
          <pub-id pub-id-type="pmcid">PMC5341604</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Schuetz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Using recurrent neural network models for early detection of heart failure onset</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2017</year>
          <month>03</month>
          <day>01</day>
          <volume>24</volume>
          <issue>2</issue>
          <fpage>361</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27521897"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocw112</pub-id>
          <pub-id pub-id-type="medline">27521897</pub-id>
          <pub-id pub-id-type="pii">ocw112</pub-id>
          <pub-id pub-id-type="pmcid">PMC5391725</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ni</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence-based traditional Chinese medicine assistive diagnostic system: validation study</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>06</month>
          <day>15</day>
          <volume>8</volume>
          <issue>6</issue>
          <fpage>e17608</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/6/e17608/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17608</pub-id>
          <pub-id pub-id-type="medline">32538797</pub-id>
          <pub-id pub-id-type="pii">v8i6e17608</pub-id>
          <pub-id pub-id-type="pmcid">PMC7324998</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhikui</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jing</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Jianing</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Research progress in data mining-based TCM diagnoses</article-title>
          <source>Chinese J Traditional Chinese Med</source>
          <year>2020</year>
          <volume>38</volume>
          <issue>12</issue>
          <fpage>1</fpage>
          <lpage>9</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Data mining technique in application of syndromes and prescriptions of Traditional Chinese medicine for diabetes (in Chinese)</article-title>
          <source>Med &#38; Pharm J Chin PLA</source>
          <year>2015</year>
          <volume>27</volume>
          <fpage>34</fpage>
          <lpage>38</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Page</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Brin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Motwani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Winograd</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>The pagerank citation ranking: bringing order to the Web</article-title>
          <source>Proceedings of the 7th International Conference on World Wide Web</source>
          <year>1998</year>
          <conf-name>WWW7 '98</conf-name>
          <conf-date>1998</conf-date>
          <conf-loc>Brisbane, Australia</conf-loc>
          <fpage>161</fpage>
          <lpage>72</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Prescription recommendation algorithm of traditional Chinese medicine treatment of lung cancer based on complex network</article-title>
          <source>Lishizhen Med and Materia Medica Res</source>
          <year>2019</year>
          <volume>5</volume>
          <fpage>1257</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.doc88.com/p-2836116537369.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Exploration on generating Traditional Chinese medicine prescription from symptoms with an end-to-end method</article-title>
          <source>Proceedings of the 8th CCF International Conference on Natural Language Processing and Chinese Computing</source>
          <year>2019</year>
          <conf-name>NLPCC '19</conf-name>
          <conf-date>October 9–14, 2019</conf-date>
          <conf-loc>Dunhuang, China</conf-loc>
          <fpage>486</fpage>
          <lpage>98</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-030-32233-5_38</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Courville</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vincent</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Representation learning: a review and new perspectives</article-title>
          <source>IEEE Trans Pattern Anal Mach Intell</source>
          <year>2013</year>
          <month>08</month>
          <volume>35</volume>
          <issue>8</issue>
          <fpage>1798</fpage>
          <lpage>828</lpage>
          <pub-id pub-id-type="doi">10.1109/TPAMI.2013.50</pub-id>
          <pub-id pub-id-type="medline">23787338</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Bidirectional LSTM-CRF models for sequence tagging</article-title>
          <source>arXiv</source>
          <year>2015</year>
          <month>08</month>
          <day>09</day>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1508.01991.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1508.01991</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lample</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ballesteros</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Subramanian</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kawakami</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dyer</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Neural architectures for named entity recognition</article-title>
          <source>Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2016</year>
          <conf-name>NAACL '16</conf-name>
          <conf-date>June 12-17, 2016</conf-date>
          <conf-loc>San Diego, CA, USA</conf-loc>
          <fpage>260</fpage>
          <lpage>70</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/n16-1030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Habibi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Weber</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Neves</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wiegandt</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Leser</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Deep learning with word embeddings improves biomedical named entity recognition</article-title>
          <source>Bioinformatics</source>
          <year>2017</year>
          <month>07</month>
          <day>15</day>
          <volume>33</volume>
          <issue>14</issue>
          <fpage>i37</fpage>
          <lpage>48</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28881963"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btx228</pub-id>
          <pub-id pub-id-type="medline">28881963</pub-id>
          <pub-id pub-id-type="pii">3953940</pub-id>
          <pub-id pub-id-type="pmcid">PMC5870729</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rongqing</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yujie</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Entity recognition of Chinese electronic medical records based on BiLSTM-CRF network and dictionary resources</article-title>
          <source>J Modern Inf</source>
          <year>2020</year>
          <volume>40</volume>
          <issue>4</issue>
          <fpage>3</fpage>
          <lpage>12</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Named entity recognition in Chinese electronic medical records using transformer-CRF (in Chinese)</article-title>
          <source>Computer Engineering and Applications</source>
          <year>2020</year>
          <volume>2020</volume>
          <issue>56</issue>
          <fpage>153</fpage>
          <lpage>9</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Efficient estimation of word representations in vector space</article-title>
          <source>Proceedings of the 2013 International Conference on Learning Representations</source>
          <year>2013</year>
          <conf-name>ICLR '13</conf-name>
          <conf-date>May 2-4, 2013</conf-date>
          <conf-loc>Scottsdale, AZ, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>arXiv</source>
          <year>2017</year>
          <month>06</month>
          <day>12</day>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1706.03762</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goodfellow</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Pouget-Abadie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mirza</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Warde-Farley</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ozair</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Courville</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Generative adversarial networks</article-title>
          <source>Proceedings of the 27th Advances in Neural Information Processing Systems</source>
          <year>2014</year>
          <conf-name>NeurIPS '14</conf-name>
          <conf-date>December 8-13, 2014</conf-date>
          <conf-loc>Montreal, Canada</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3422622</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ni</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>On standardization of basic datasets of electronic medical records in traditional Chinese medicine</article-title>
          <source>Comput Methods Programs Biomed</source>
          <year>2019</year>
          <month>06</month>
          <volume>174</volume>
          <fpage>65</fpage>
          <lpage>70</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2017.12.024</pub-id>
          <pub-id pub-id-type="medline">29292098</pub-id>
          <pub-id pub-id-type="pii">S0169-2607(17)30756-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <article-title>Specification for drafting of health information basic dataset (WS 445-2014)S</article-title>
          <source>Ministry of Health of the People’s Republic of China</source>
          <year>2014</year>
          <access-date>2022-05-04</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://wenku.baidu.com/view/4c17892d760bf78a6529647d27284b73f342365b.html">https://wenku.baidu.com/view/4c17892d760bf78a6529647d27284b73f342365b.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <article-title>Specification for sharing documents of electronic medical record - part 1: medical record summary (WS/<italic>t</italic> 5001-2016)S</article-title>
          <source>Ministry of Health of the People’s Republic of China</source>
          <year>2016</year>
          <access-date>2022-04-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ishare.iask.sina.com.cn/f/rIcgxKYJb9.html">https://ishare.iask.sina.com.cn/f/rIcgxKYJb9.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheung</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>TCM: made in China</article-title>
          <source>Nature</source>
          <year>2011</year>
          <month>12</month>
          <day>21</day>
          <volume>480</volume>
          <issue>7378</issue>
          <fpage>S82</fpage>
          <lpage>3</lpage>
          <pub-id pub-id-type="doi">10.1038/480S82a</pub-id>
          <pub-id pub-id-type="medline">22190085</pub-id>
          <pub-id pub-id-type="pii">480S82a</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Research on the recommendation method of traditional Chinese medicine dynamic diagnosis and treatment plan based on real-world clinical data</article-title>
          <source>Beijing Jiaotong University</source>
          <year>2020</year>
          <access-date>2022-05-04</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.docin.com/p-2607021734.html">https://www.docin.com/p-2607021734.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xiaolu</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Application of deep learning in electronic health records data</article-title>
          <source>Xiamen University</source>
          <year>2019</year>
          <access-date>2022-04-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://kns.cnki.net/kcms/detail/detail.aspx?dbcode=CMFD&#38;dbname=CMFD202002&#38;filename=1019064889.nh&#38;uniplatform=NZKPT&#38;v=JdsETSDw9TG7mLWRMKxEtYFZWzSE1ntYAQYDF6L2Z3Tl3ccV-citYLrD1g30mRob">https://kns.cnki.net/kcms/detail/detail.aspx?dbcode=CMFD&#38;dbname=CMFD202002&#38;filename=1019064889.nh&#38;uniplatform=NZKPT&#38;v= JdsETSDw9TG7mLWRMKxEtYFZWzSE1ntYAQYDF6L2Z3Tl3ccV-citYLrD1g30mRob</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
