<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i1e14971</article-id>
      <article-id pub-id-type="pmid">31939742</article-id>
      <article-id pub-id-type="doi">10.2196/14971</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Generating Medical Assessments Using a Neural Network Model: Algorithm Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Torii</surname>
            <given-names>Manabu</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>del Pozo Banos</surname>
            <given-names>Marcos</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Hu</surname>
            <given-names>Baotian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7490-684X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Bajracharya</surname>
            <given-names>Adarsha</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1691-0506</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>Hong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science</institution>
            <institution>University of Massachusetts Lowell</institution>
            <addr-line>1 University Ave</addr-line>
            <addr-line>Lowell, MA, 01854</addr-line>
            <country>United States</country>
            <phone>1 5086127292</phone>
            <email>Hong_Yu@uml.edu</email>
          </address>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9263-5035</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science</institution>
        <institution>University of Massachusetts Lowell</institution>
        <addr-line>Lowell, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Medicine</institution>
        <institution>University of Massachusetts Medical School</institution>
        <addr-line>Worcester, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Bedford Veterans Affairs Medical Center</institution>
        <addr-line>Bedford, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>School of Computer Science</institution>
        <institution>University of Massachusetts Amherst</institution>
        <addr-line>Amherst, MA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Hong Yu <email>Hong_Yu@uml.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>1</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>1</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>1</issue>
      <elocation-id>e14971</elocation-id>
      <history>
        <date date-type="received">
          <day>7</day>
          <month>6</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>1</day>
          <month>7</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>28</day>
          <month>9</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>19</day>
          <month>10</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Baotian Hu, Adarsha Bajracharya, Hong Yu. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 15.01.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2020/1/e14971/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Since its inception, artificial intelligence has aimed to use computers to help make clinical diagnoses. Evidence-based medical reasoning is important for patient care. Inferring clinical diagnoses is a crucial step during the patient encounter. Previous works mainly used expert systems or machine learning–based methods to predict the International Classification of Diseases - Clinical Modification codes based on electronic health records. We report an alternative approach: inference of clinical diagnoses from patients’ reported symptoms and physicians’ clinical observations.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aimed to report a natural language processing system for generating medical assessments based on patient information described in the electronic health record (EHR) notes.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We processed EHR notes into the Subjective, Objective, Assessment, and Plan sections. We trained a neural network model for medical assessment generation (N2MAG). Our N2MAG is an innovative deep neural model that uses the Subjective and Objective sections of an EHR note to automatically generate an “expert-like” assessment of the patient. N2MAG can be trained in an end-to-end fashion and does not require feature engineering and external knowledge resources.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We evaluated N2MAG and the baseline models both quantitatively and qualitatively. Evaluated by both the Recall-Oriented Understudy for Gisting Evaluation metrics and domain experts, our results show that N2MAG outperformed the existing state-of-the-art baseline models.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>N2MAG could generate a medical assessment from the Subject and Objective section descriptions in EHR notes. Future work will assess its potential for providing clinical decision support.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>electronic health record note</kwd>
        <kwd>medical assessment generation</kwd>
        <kwd>deep neural network model</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>natural language processing</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Electronic health record (EHR) systems have been widely adopted by hospitals in the United States and other countries [<xref ref-type="bibr" rid="ref1">1</xref>], resulting in an unprecedented amount of digital data or EHRs associated with patient encounters [<xref ref-type="bibr" rid="ref2">2</xref>]. The primary function of EHRs is to document patients’ clinical information and share them among health care providers for patient care. Rich clinical information is represented in the EHRs. In recent years, secondary use of EHRs has helped advance EHR-related computational approaches [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>].</p>
      <p>EHR notes are written by providers who care for their patients. Providers are trained to write notes with a problem-oriented SOAP (Subjective, Objective, Assessment, and Plan) structure [<xref ref-type="bibr" rid="ref5">5</xref>] along with the Header, which records patients’ necessary information such as name, date of birth, and reason for visit or chief complaint. <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> shows an illustrative example of a SOAP note for an outpatient encounter. Typically, the subjective section describes patients’ current condition(s), either as patients’ self-reports or physicians’ summaries of previous and pertinent clinical conditions relevant to the chief complaints. This includes medical history, surgical history, family history, and social history along with current medications, smoking status, and drug/alcohol/caffeine use. The Objective section includes clinical conditions, measurements, and observations from patients’ laboratory, physical, and other examinations that are noted during the clinic visit when the note was created. The assessment section typically contains medical diagnoses and summaries of the key elements that lead to the medical diagnoses. Following the diagnoses, physicians lay out the plan for treatment or differential diagnosis, including ordering labs (for differential diagnosis), radiological referrals, performing procedures, and prescribing medications.</p>
      <boxed-text id="box1" position="float">
        <title>A typical SOAP (Subjective, Objective, Assessment, and Plan) electronic health record note (deidentified).</title>
        <p><bold>Header:</bold> Umass memorial medical center patient:&#60;patient name&#62; &#60;acct.#&#62; &#60;mr#&#62; &#60;date of birth&#62; &#60;date of service&#62; &#60;address&#62; &#60;physician name&#62; &#60;dictation date&#62; clinic note reason for visit: postoperative visit status post open reduction and percutaneous pinning of right small finger metacarpal neck fracture. </p>
        <p><bold>Subjective:</bold> this is a very pleasant 28-year-old gentleman that we have been following and treating for right small finger metacarpal neck fracture sustained on 03/04/2016 . he feels well . he has been working very closely with hand therapy . he has increased his extension of his small finger. he has not really worked on his grip as of yet .</p>
        <p><bold>Objective:</bold> physical examination: the scar is well healed externally , although it does feel like there is some prominent scar tissue in the deep soft tissues . he is able to better extend his small finger , although there is still a small amount of extensor lag at rest. his sensation otherwise is intact on the radial and ulnar aspects of his finger . radiographs : three views of his hand are taken today and his metacarpal appears better aligned compared to before . he has exhibited bony healing and on the whole , the alignment is acceptable .</p>
        <p><bold>Assessment:</bold> healing well status post open reduction and percutaneous pinning of right small finger metacarpal fracture.</p>
        <p><bold>Plan:</bold> the patient should continue working with hand therapy and at this point, he is 8  weeks out. he may begin some light strengthening with a target date for weightbearing around the 10 to 12-week mark. I have advised him that if it bothers him that he cannot fully extend his small finger secondary to scar tissue, we can always try to perform a tenolysis of the tendon in the future. He wishes to hold off on this and I will plan to see him back in about 2 moths. 
</p>
      </boxed-text>
      <p>Rich clinical knowledge can be inferred from EHRs with such a SOAP structure. In this case, the chief complaint and subjective evidence lead to objective measurements. Assessments are inferred from both subjective and objective evidence and lead to specific plans. As illustrated in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>, the assessment typically contains two components: (1) a summary of the main conditions, and (2) the diagnoses or likely diagnoses, typically in order from the most likely to the least likely.</p>
      <p>Inferring clinical diagnoses is a crucial step during the patient encounter. In the clinical domain, natural language processing (NLP) apps have mainly focused on adverse event detection [<xref ref-type="bibr" rid="ref6">6</xref>], named entity recognition [<xref ref-type="bibr" rid="ref7">7</xref>], and relation identification [<xref ref-type="bibr" rid="ref8">8</xref>]. A closely related system is automated International Classification of Diseases (ICD) code assignment, where these models employ machine learning approaches to predict ICD-Clinical Modification (CM) codes [<xref ref-type="bibr" rid="ref9">9</xref>]. However, ICD-CM codes are created mainly for billing purposes and have limitations (eg, incomplete assignment [<xref ref-type="bibr" rid="ref10">10</xref>]) when used as the gold standard for diagnosis labels. In this study, we propose a complementary approach. We built an expert system by directly learning clinical knowledge from SOAP notes to generate medical assessments and diagnoses. Unlike previous expert systems that mainly comprise predefined diagnosis categories, our system generates assessment that is described in natural language.</p>
      <p>Automatically generating medical assessment is a challenging task in both computer science and medicine. Both subjective and objective components in a SOAP note are generally verbose, containing abundant medical jargon, much of which is sparse (with low term frequency) and therefore considered as out-of-vocabulary words. EHR narratives also use irregular natural language, including broken sentence structures, and are written by different physicians with different writing styles, many of whom have been trained outside the United States.</p>
      <p>Our computation model for medical assessment generation is based on our observation that the medical assessment generation task is partially analogous to the abstractive text summarization tasks. In recent years, much progress has been made on neural abstractive summarizations [<xref ref-type="bibr" rid="ref11">11</xref>]. The canonical neural sequence-to-sequence model uses recurrent neural network (RNN) to encode an input document and another RNN as a decoder with an attention mechanism to generate the target text [<xref ref-type="bibr" rid="ref12">12</xref>]. State-of-the-art models have been proposed in recent years, such as the copy mechanism [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>] and coverage mechanism [<xref ref-type="bibr" rid="ref15">15</xref>]. These models have demonstrated advances for generating long-document summarization [<xref ref-type="bibr" rid="ref16">16</xref>].</p>
      <p>In this study, we explored these aforementioned state-of-the-art models as baseline models for Assessment generation. Our innovative approach is as follows: In addition to depending on the Subjective and Objective descriptions, the Assessment generation is conditioned on the chief complaint(s), which is the reason that a patient seeks medical treatment. Therefore, our NN model for medical assessment generation (N2MAG) augments the pointer-generator network proposed by Seeet al [<xref ref-type="bibr" rid="ref16">16</xref>], with an innovative attention-over-attention model. Thus, the chief complaints information in the Header section could be used to infer assessment. Evaluation of 953 patients’ EHR notes shows that N2MAG can generate natural and fluent assessment, significantly outperforming competitive baseline models by using both the Recall-Oriented Understudy for Gisting Evaluation (ROUGE) evaluation metrics and physicians’ evaluation.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>The Overall Architecture</title>
        <p>N2MAG merges the narrative text <italic>X</italic> in subjective and objective sections as an input document, denoted as a sequence of words (f<sub>1</sub>, f<sub>2</sub>...f<sub>n</sub>). Its header section, <italic>T,</italic> is represented by a sequence of words (w<sub>1</sub>, w<sub>2</sub>...w<sub>m</sub>). The goal of N2MAG is to generate the assessment, <italic>Y</italic>, consisting of a word sequence (y<sub>1</sub>, y<sub>2</sub>...y<sub>l</sub>), given <italic>X</italic> and <italic>T</italic>. As illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>, N2MAG has three components: the encoder of subjective and objective sections (the main encoder), the encoder of the header section, and the decoder that generates medical assessment.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Illustration of the Neural Model for Medical Assessment Generation (N2MAG).</p>
          </caption>
          <graphic xlink:href="medinform_v8i1e14971_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>This study obtained approval from the Institutional Review Board at the University of Massachusetts Medical School.</p>
      </sec>
      <sec>
        <title>The Main Encoder</title>
        <p>The N2MAG uses a single-layer, bidirectional long short-term memory (LSTM) neural network [<xref ref-type="bibr" rid="ref17">17</xref>] to encode the input text (ie, the subjective and objective sections). LSTM is commonly used for sequence-related applications [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. The sequence of words in subjective and objective sections <italic>X</italic> is first mapped to a sequence of word vectors (x<sub>1</sub>...x<sub>n</sub>), by looking up the word embedding matrix M<sup>dx&#124;V&#124;</sup>, where d denotes the dimension of word embeddings and &#124;V&#124; denotes the size of vocabulary. The word vector <italic>x<sub>i</sub></italic> is then fed into the bidirectional LSTM (denoted as LSTM<sub>source</sub>) one by one, which produces a sequence of encoder hidden states [h<sub>1</sub>…h<sub>n</sub>], denoted as H. The subjective and objective text is therefore represented as a sequence of hidden states <italic>H</italic>.</p>
      </sec>
      <sec>
        <title>The Encoder of the Header Section</title>
        <p>For the canonical neural sequence to sequence model, there is only one encoder, that is, LSTM<sub>source</sub>. However, for medical assessment generation, the Header section contains valuable information (eg, chief complaints), which is useful for assessment generation. In order to encode the Header section, N2MAG uses another bidirectional LSTM denoted as LSTM<sub>header</sub>. Similar to the encoder of the subjective and objective sections, the sequence of words in the Header section <italic>T</italic> is first mapped to a sequence of word vectors (t<sub>1</sub>…t<sub>m</sub>) denoted as <italic>T</italic>. The word vector <italic>t<sub>i</sub></italic> is then fed into the encoder LSTM<sub>header</sub> one by one, which produces a sequence of encoder hidden states [z<sub>1</sub>…z<sub>m</sub>], denoted as <italic>Z</italic>:</p>
        <disp-formula>Z=LSTM<sub>header</sub> (t<sub>1</sub>...t<sub>m</sub>) (1)</disp-formula>
        <p>For N2MAG, <italic>Z</italic> will be used by the decoder to fetch more accurate information from the subjective and objective input sections.</p>
      </sec>
      <sec>
        <title>The Decoder of Assessment</title>
        <p>The decoder of N2MAG is a single-layer LSTM. It generates words one by one from the given start symbol &#60;/begin&#62; and terminates when &#60;/end&#62; is generated or the maximum decoding length is reached. At each step, the decoder LSTM receives the word embedding of the previous word to produce the decode state s<sub>i</sub>.</p>
        <p>The decoder of N2MAG first uses s<sub>i</sub> to attend to the hidden states <italic>Z</italic> of the Header section encoder. The attention distribution on <italic>Z</italic> can be calculated as Equation 2, where <italic>z<sub>j</sub></italic> is the encoder hidden state of the jth word in the header section.</p>
        <disp-formula><inline-graphic xlink:href="medinform_v8i1e14971_fig12.png" xlink:type="simple" mimetype="image"/> (2)</disp-formula>
        <disp-formula>ε<sub>ij</sub>=V<sup>T</sup>tanh(W<sub>Z</sub>z<sub>j</sub>+W<sub>S</sub>s<sub>j</sub>+b<sub>z</sub>) (3)</disp-formula>
        <p>The patient’s information <italic>z<sub>i</sub></italic><sup>*</sup>, which the decoder attended to during the decoding step <italic>i</italic>, can be calculated as Equation 4:</p>
        <disp-formula>z<sub>i</sub><sup>*</sup>=Σ<sup>m</sup><sub>k=1</sub>α<sub>ik</sub> z<sub>k</sub> (4)</disp-formula>
        <p>where V, W<sub>Z</sub>, W<sub>S</sub>, and b<sub>Z</sub> are learnable parameters.</p>
        <p>In the next step, N2MAG uses <italic>s<sub>i</sub></italic> and <italic>z<sub>i</sub><sup>*</sup></italic> to attend to the hidden states <italic>H</italic>. The attention probability of <italic>h<sub>j</sub></italic> on the decoding step <italic>i</italic> is calculated as Equation 5. The attention distribution <italic>β<sub>i*</sub></italic> of <italic>H</italic> on the decoding step <italic>i</italic> can be represented as (β<sub>i1</sub>...β<sub>in</sub>).</p>
        <disp-formula><inline-graphic xlink:href="medinform_v8i1e14971_fig13.png" xlink:type="simple" mimetype="image"/> (5)</disp-formula>
        <disp-formula><inline-graphic xlink:href="medinform_v8i1e14971_fig3.png" xlink:type="simple" mimetype="image"/> (6)</disp-formula>
        <p>where <inline-graphic xlink:href="medinform_v8i1e14971_fig4.png" xlink:type="simple" mimetype="image"/> are learnable parameters.</p>
        <p>N2MAG uses the attention distribution <italic>β<sub>i*</sub></italic> to fetch information <italic>h<sub>i</sub><sup>*</sup></italic> from the subjective and objective sections, which can be calculated as mentioned in Equation 7:</p>
        <disp-formula>h<sub>i</sub><sup>*</sup>=Σ<sup>n</sup><sub>k=1</sub>β<sub>ik</sub>h<sub>k</sub> (7) </disp-formula>
        <p>This equation allows N2MAG to consider both the current decoder state and the patient’s information to fetch information from the subjective and objective sections, which can be viewed as the attention-over-attention mechanism. Generally, the current decoder state s<sub>i</sub> is to inform the decoder of which types of information are to be fetched. The <italic>z<sub>i</sub><sup>*</sup></italic> forces the decoder to target at a more specific location.</p>
        <p>To handle out-of-vocabulary words in EHR notes, N2MAG also uses copying or pointing mechanisms [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. The copying mechanism allows the network to copy words from the source text. N2MAG first computes the probability <italic>p<sup>i</sup><sub>gen</sub></italic> of generating a word from the predefined vocabulary on decoding step <italic>i</italic>, which can be formulated as Equation 8.</p>
        <disp-formula>p<sup>i</sup><sub>gen</sub>=σ(W’<sub>h*</sub>h<sup>*</sup><sub>i</sub>+ W’<sub>S</sub>s<sub>i</sub>+ W’<sub>y</sub> y<sub>i-1</sub>+b’) (8)</disp-formula>
        <p>where W’<sub>h*</sub>, W’<sub>S</sub>, W’<sub>y</sub>, and scalar b’ are learnable parameters; <italic>p<sup>i</sup><sub>gen</sub></italic> is then used as a soft gate to decide whether to sample a word from the distribution on predefined vocabulary or from the attention distribution <italic>β<sub>i*</sub></italic>. The final probability of the word <italic>w</italic> output by the decoder on decoding step <italic>i</italic> can be formulated as Equation 9:</p>
        <disp-formula>p<sup>i</sup>(w)= p<sup>i</sup><sub>gen</sub> * p<sup>i</sup><sub>voc</sub>(w)+(1- p<sup>i</sup><sub>gen</sub>)*Σ<sup>n</sup><sub>j=1</sub>1(w<sub>j</sub>=w)* β<sub>ij</sub> (9) </disp-formula>
        <p>where 1(w<sub>j</sub>=w) equals to 1, if the <italic>j</italic>th word is in the subjective and objective section <italic>X</italic> and is the word <italic>w</italic>. Otherwise, 1(w<sub>j</sub>=w) equals to 0; <italic>p<sup>i</sup><sub>voc</sub>(w)</italic> is the probability of sampling word <italic>w</italic> from the predefined vocabulary on decoding step <italic>i</italic>; and <italic>p<sup>i</sup><sub>voc</sub></italic> is the word distribution on predefined vocabulary on decoding step <italic>i</italic>, which can be computed in Equation 10:</p>
        <disp-formula><inline-graphic xlink:href="medinform_v8i1e14971_fig5.png" xlink:type="simple" mimetype="image"/> (10)</disp-formula>
        <p>where <inline-graphic xlink:href="medinform_v8i1e14971_fig6.png" xlink:type="simple" mimetype="image"/> are learnable parameters.</p>
        <p>In summary, our N2MAG uses both the attention-over-attention and copying mechanisms. The attention-over-attention can facilitate the decoder to locate more accurate information from the narrative text. The copying mechanism can alleviate the out-of-vocabulary problems during decoding.</p>
      </sec>
      <sec>
        <title>Training</title>
        <p>The parameters <italic>θ</italic> of the N2MAG includes four parts: the word embedding matrix <italic>M</italic>, the parameter <italic>θ<sub>1</sub></italic> of <inline-graphic xlink:href="medinform_v8i1e14971_fig10.png" xlink:type="simple" mimetype="image"/><sub>source</sub>, the parameter <italic>θ<sub>2</sub></italic> of <inline-graphic xlink:href="medinform_v8i1e14971_fig10.png" xlink:type="simple" mimetype="image"/><sub>header</sub>, and the parameter <italic>θ<sub>3</sub></italic> for the decoder of assessment. The probability of generating reference assessment <italic>Y</italic> can be formulated in Equation 11:</p>
        <disp-formula>P(Y&#124;X,T; θ)=∏<sup>l</sup><sub>i=1</sub>P<sup>i</sup>(y<sub>i</sub>) (11) </disp-formula>
        <p>The negative log-likelihood loss for generating the reference assessment <italic>Y</italic> is calculated as Equation 12:</p>
        <disp-formula>Loss<sub>nll</sub>(Y&#124;X,T;θ)=–Σ<sup>l</sup><sub>i=1</sub>log(P<sup>i</sup>(y<sub>i</sub>))/l (12)</disp-formula>
        <p>Equation 12 is the basic loss used in N2MAG. Our loss function is based on the recent research on the neural sequence-to-sequence models such as minimum risk training [<xref ref-type="bibr" rid="ref19">19</xref>], cost weighting [<xref ref-type="bibr" rid="ref20">20</xref>], and coverage mechanism [<xref ref-type="bibr" rid="ref15">15</xref>]. Since clinical content integrity is very important for making a diagnosis, we chose the coverage mechanism, which forces the model to attend to the different locations of source text instead of one. On the decoding step <italic>i</italic>, the decoder uses the Equation 13 mentioned below to compute the vector (c<sub>i1</sub>…c<sub>in</sub>) denoted as <italic>c<sub>i*</sub></italic>, whose dimension equals the length of the subjective and objective text. In addition, <italic>c<sub>i*</sub></italic> is used to record the accumulative attention degree of each word until the decoding step <italic>i</italic>:</p>
        <disp-formula>c<sub>i*</sub>=Σ<sub>k=1</sub><sup>i-1</sup>β<sub>i*</sub> (13) </disp-formula>
        <p>Then, <italic>c<sub>i*</sub></italic> is added to equation 6 as an extra factor. Hence, equation 6 is modified to Equation 14 as follows:</p>
        <disp-formula><inline-graphic xlink:href="medinform_v8i1e14971_fig7.png" xlink:type="simple" mimetype="image"/> (14)</disp-formula>
        <p>where <inline-graphic xlink:href="medinform_v8i1e14971_fig8.png" xlink:type="simple" mimetype="image"/> is the extra learnable parameter. Therefore, in the training period, the learnable parameter θ’ includes two parts <inline-graphic xlink:href="medinform_v8i1e14971_fig9.png" xlink:type="simple" mimetype="image"/>. We use the coverage loss Loss<sub>cov</sub> as Equation 15:</p>
        <disp-formula>Loss<sub>cov</sub>(Y&#124;X,T;θ<sup>’</sup>)= Σ<sup>l</sup><sub>k=1</sub>Σ<sup>n</sup><sub>j=1</sub> min(β<sub>kj,</sub> c<sub>kj</sub>) (15)</disp-formula>
        <p>Finally, the coverage loss Loss<sub>cov</sub> and negative log-likelihood loss Loss<sub>nll</sub>(Y&#124;X,T;θ) are linearly combined with hyperparameter λ as Equation 16.</p>
        <disp-formula>Loss(Y&#124;X,T; θ<sup>’</sup>)= Loss<sub>nll</sub>(Y&#124;X,T;θ)+λLoss<sub>nll</sub>(Y&#124;X,T;θ<sup>’</sup>) (16)</disp-formula>
        <p>The λLoss<sub>nll</sub>(Y&#124;X,T;θ<sup>’</sup>) can be viewed as the model regularization factor. It can prevent N2MAG from overfitting on specific local parts. In practice, we first train N2MAG with the loss Loss<sub>nll</sub>(Y&#124;X,T;θ) until it converges on the validation set. Subsequently, we incorporate the coverage mechanism into pretrained N2MAG and continue to train it with the loss Loss(Y&#124;X,T;θ<sup>’</sup>).</p>
      </sec>
      <sec>
        <title>Experiments and Systems</title>
        <sec>
          <title>Dataset</title>
          <p>Our EHR data comprise 235,458 outpatient EHR notes from the University of Massachusetts Memorial Medical Center, from which we randomly selected 233,470, 1,035, and 953 notes for training, development, and test sets, respectively. As described previously, a typical structure of EHR notes includes the Header and SOAP sections, as shown in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>, although variations exist. For example, in some notes, Subjective and Objective sections are not explicitly marked, but the relevant content is described in other sections such as “History of present illness.” To address the variations, we simply aggregated the text between “History of present illness” and “Assessment” as the “Subjective” and “Objective” sections.</p>
        </sec>
        <sec>
          <title>Models</title>
          <p>We compare N2MAG with the state-of-the-art neural sequence-to-sequence models. The detailed setups of the baseline and our N2MAG models are described as follows:</p>
          <list list-type="bullet">
            <list-item>
              <p>
        Seq2Seq+att: Seq2Seq+att is the model proposed by Bahdanau et al [<xref ref-type="bibr" rid="ref12">12</xref>], which is commonly used as the benchmark model for sequence-to-sequence tasks.
    </p>
            </list-item>
            <list-item>
              <p>
        Pointer-generator (PG): PG [<xref ref-type="bibr" rid="ref16">16</xref>] is the state-of-the-art model for document summarization. It incorporates the copying mechanism on the Seq2Seq+att model.
    </p>
            </list-item>
            <list-item>
              <p>
        PG+Coverage: PG+Coverage is proposed by See et al [<xref ref-type="bibr" rid="ref16">16</xref>]. It incorporates the coverage mechanism based on the pretrained PG. The hyperparameter λ is set to 0.2.
    </p>
            </list-item>
            <list-item>
              <p>
        N2MAG: N2MAG is trained with negative likelihood loss Lossnll(Y&#124;X,T;θ).
    </p>
            </list-item>
            <list-item>
              <p>
        N2MAG+Coverage: It incorporates the coverage mechanism based on the pretrained N2MAG and is continuously trained with loss Loss(Y&#124;X,T; θ’). The hyperparameter λ is set to 0.2.
    </p>
            </list-item>
          </list>
        </sec>
        <sec>
          <title>Settings</title>
          <p>All aforementioned models use LSTM as both the encoder and decoder to train on the same training set. All the hyperparameters are chosen empirically. The dimension of the hidden state is set to 200, and the embedding dimension is set to 128. All the parameters are randomly initialized. The vocabulary size is set to 100,000. We take the tokens that contain digit as out-of-vocabulary words and add the digit “0-9” to the vocabulary. During training and testing, we truncate the subjective and objective sections to 500 tokens and limit the length of the assessment section to 60 tokens for training. For N2MAG and N2MAG+Coverage, we truncate the Header section to 100 tokens. All these models are trained using Adagrad [<xref ref-type="bibr" rid="ref21">21</xref>] with a learning rate of 0.12 and an initial accumulator value of 0.11. We use the loss on the validation set to implement early stopping [<xref ref-type="bibr" rid="ref22">22</xref>]. At the test time, all the models produce assessment using beam search with a beam size of 10, the minimum decoding length is set to 15, and the maximum decoding length is set to 60.</p>
        </sec>
      </sec>
      <sec>
        <title>Evaluation</title>
        <sec>
          <title>Recall-Oriented Understudy for Gisting Evaluation</title>
          <p>Recall-Oriented Understudy for Gisting Evaluation (ROUGE) [<xref ref-type="bibr" rid="ref23">23</xref>] is commonly used to evaluate document summarization models and has been proven to be strongly correlated with human evaluation results. We therefore use ROUGE to evaluate N2MAG and other baseline models.</p>
          <p>There are multiple variants of ROUGE scores. Among them, ROUGE-1 (R-1), ROUGE-2 (R-2), and ROUGE-L (R-L) are the most commonly used ones. ROUGE-n (R-n) can be computed as Equation 17 below:</p>
          <disp-formula><graphic xlink:href="medinform_v8i1e14971_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/> (17)</disp-formula>
          <p>where <italic>n</italic> stands for the length of the n-gram, Count<sub>match</sub>(gram<sub>n</sub>) is the maximum number of n-grams co-occurring in both the generated assessment and the reference. Similarly, we could compute the R-n precision and F<sub>1</sub>. R-1 and R-2 are special cases of R-n, in which n=1 or n=2. R-L is instead computed based on the length of the longest common subsequence between the candidate assessment and the reference. In this work, we use F<sub>1</sub> of R-1, R-2, and R-L as our evaluation.</p>
        </sec>
        <sec>
          <title>Expert Evaluation</title>
          <p>We also conducted a qualitative evaluation to compare the N2MAG+Coverage model with the PG+Coverage model, since both models have competitive performance based on our quantitative evaluation results. We randomly sampled 50 patients’ EHR notes from the test set and asked two unbiased physicians who were not privy to the reasons, to evaluate the quality of the generated assessments. Specifically, for each EHR note, we presented three assessments (the doctor’s assessment, assessments produced by N2MAG+Coverage, and PG+Coverage) to two physicians. To ensure fairness, the order of the three assessments for each EHR note was randomized. In order to eliminate bias against computer-generated outputs, we informed the physician evaluators that all three assessments are outputs by a machine. The score ranged from 1 to 5, where 1 denotes “the worst” and 5 denotes “the best.”</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p><xref ref-type="table" rid="table1">Table 1</xref> shows the performance comparison between our models and the baseline models. The results show that both N2MAG and PG with the copying mechanism outperformed the Seq2Seq+att model. Our manual analysis concluded that the copying mechanism can mitigate data sparsity. Specifically, even with a large vocabulary, the Seq2Seq+att models failed to generate some words (such as the patient’s name and age), while the models (PG and N2MAG) with copying mechanism could generate these words. Although it is common for doctors to describe patients’ basic information (such as name and age), such information represents the rare word challenge. This is also one of the reasons that Seq2Seq+att performed poorly based on ROUGE.</p>
      <p>The results also show that PG+Coverage and N2MAG+Coverage outperformed their corresponding PG and N2MAG models. The results demonstrate that the coverage mechanism can boost the model to comprehend patients’ EHR notes as a whole instead of only focusing on some specific text. These results conclude that both the copying and coverage mechanisms benefit PG and N2MAG performance, which is in line with the previous research in the NLP domain, such as document summarization [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref16">16</xref>] and machine translation [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
      <p><xref ref-type="table" rid="table1">Table 1</xref> shows that both N2MAG and N2MAG+Coverage, which use the attention-over-attention mechanism to incorporate the patients’ basic information, outperformed PG and PG+Coverage. The results support our intuition that patients’ chief complaint information is valuable. For example, in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>, the “reason for visit” clearly shows that the main purpose of the patient’s visit is “postoperative visit status post open reduction and percutaneous pinning of right small finger metacarpal neck fracture.” Our attention-over-attention mechanism allowed the models to condition on the chief complaint and therefore generated better assessments.</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Performance results evaluated with the F1 ROUGE scores (%). All scores of N2MAG and N2MAG+Coverage are statistically significant using 95% CIs with respect to competitor models.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="352"/>
          <col width="199"/>
          <col width="199"/>
          <col width="250"/>
          <thead>
            <tr valign="bottom">
              <td>Model</td>
              <td>ROUGE<sup>a</sup>-1</td>
              <td>ROUGE-2</td>
              <td>ROUGE-L</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Seq2Seq+att</td>
              <td>37.4</td>
              <td>20.3</td>
              <td>34.7</td>
            </tr>
            <tr valign="top">
              <td>PG<sup>b</sup></td>
              <td>38.6</td>
              <td>22.5</td>
              <td>35.8</td>
            </tr>
            <tr valign="top">
              <td>PG+Coverage</td>
              <td>41.6</td>
              <td>24.8</td>
              <td>38.6</td>
            </tr>
            <tr valign="top">
              <td>N2MAG<sup>c</sup></td>
              <td>43.1</td>
              <td>27.0</td>
              <td>40.2</td>
            </tr>
            <tr valign="top">
              <td>N2MAG+Coverage</td>
              <td>45.2</td>
              <td>28.5</td>
              <td>41.8</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table1fn1">
            <p><sup>a</sup>ROUGE: Recall-Oriented Understudy for Gisting Evaluation.</p>
          </fn>
          <fn id="table1fn2">
            <p><sup>b</sup>PG: point-generator.</p>
          </fn>
          <fn id="table1fn3">
            <p><sup>c</sup>N2MAG: neural network model for medical assessment generation.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <p><xref ref-type="table" rid="table2">Table 2</xref> shows the physician's evaluation results. The results show that N2MAG+Coverage outperformed PG+Coverage based on the overall quality of assessment. The results show that although both PG+Coverage and N2MAG+Coverage achieved better scores on ROUGE, their overall quality scores remained lower (average of 2.17 and 2.36, respectively). On the other hand, the evaluation scores of doctors were also low (average of 2.92). Our results are not surprising, as there is a wealth of literature that has shown low agreement among physicians. In addition, since physician evaluators were informed that all three outputs were generated by computer systems, bias against computer systems may lead to poor overall scores.</p>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Results of two physicians’ evaluations.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="380"/>
          <col width="230"/>
          <col width="230"/>
          <col width="160"/>
          <thead>
            <tr valign="top">
              <td>Model</td>
              <td>Physician 1</td>
              <td>Physician 2</td>
              <td>Average</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Human</td>
              <td>3.14</td>
              <td>2.70</td>
              <td>2.92</td>
            </tr>
            <tr valign="top">
              <td>PG<sup>a</sup>+Coverage</td>
              <td>2.50</td>
              <td>1.84</td>
              <td>2.17</td>
            </tr>
            <tr valign="top">
              <td>N2MAG<sup>b</sup>+Coverage</td>
              <td>2.66</td>
              <td>2.06</td>
              <td>2.36</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table2fn1">
            <p><sup>a</sup>PG: point-generator.</p>
          </fn>
          <fn id="table2fn2">
            <p><sup>b</sup>N2MAG: neural network model for medical assessment generation.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <p>We analyzed the physicians’ evaluation results. We found that for 42 of 50 (84%) assessments, physician evaluators judged that N2MAG+Coverage outperformed PG+Coverage. In addition, for 18 of 50 (36%) assessments, physicians judged that N2MAG+Coverage outperformed or performed equally as the doctor who wrote the assessment of his/her patient.</p>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Error Analyses</title>
        <p>We also conducted error analyses. As described in the Results section, N2MAG+Coverage outperformed PG+Coverage 84% of the time. An example is illustrated in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref>. In this example, all three assessments correctly identified the type of injury, which is a right small finger metacarpal fracture and that the wound was healing. However, only the doctor and N2MAG+Coverage identified the type of surgery the patient underwent, which is open reduction and percutaneous pinning of the fractured bone. The difference is crucial, as the interpretation from human and N2MAG+Coverage assessments would be correct (ie, the patient is recovering after undergoing surgical treatment for the fracture), while the PG+Coverage assessment would be incorrect (ie, the patient is recovering from the fracture [without treatment]). This example shows the importance for attention over attention.</p>
        <boxed-text id="box2" position="float">
          <title>The generated assessments for the note in <xref rid="figure1" ref-type="fig">Figure 1</xref>. The numbers in brackets are the two physicians' scores.</title>
          <p><bold>Physician:</bold> healing well status post open reduction and percutaneous pinning of right small finger metacarpal fracture. &#60;4,3&#62;</p>
          <p><bold> PG+Coverage:</bold> healing well status post right small finger metacarpal fracture, status post right small finger metacarpal fracture. &#60;3,3&#62;</p>
          <p><bold> N2MAG+Coverage:</bold>  healing status post open reduction and percutaneous pinning of right small finger metacarpal fracture. &#60;4,3&#62;
</p>
        </boxed-text>
        <p>Although the result of ROUGE and expert evaluation demonstrate the utility of our N2MAG models in generating accurate medical assessments, we found that the N2MAG models made a lot of mistakes, many of which were severe, including wrong diagnoses. An example is shown in <xref ref-type="boxed-text" rid="box3">Textbox 3</xref>. The clinical narrative describes a patient’s current problem, which is urinary incontinence. The severity of the problem required the patient to use two diapers a day. The narrative also describes the prior treatment in addition to other medical conditions, surgical treatments, and current medications. Based on clinical knowledge, urinary tract infection can often be present with urinary incontinence. As such, the documented physical examination shows the clinician’s effort to look for findings suggestive of urinary tract infection. Based on the information provided, the patient has urinary incontinence but cannot fully rule out urinary tract infection because the patient has pain in her flank. Upon analysis of the three assessments, only the assessment generated by the doctor identified urinary incontinence. In contrast, PG+Coverage provided no information on the current status of the patient, while N2MAG+Coverage made with a wrong diagnosis of benign prostate hyperplasia, a condition that is not seen in females, and ruled out urinary tract infection. We speculate that if we increase the training size that N2MAG is trained on, we may mitigate this kind of mistake.</p>
        <boxed-text id="box3" position="float">
          <title>The generated assessments for one electronic health record note. The numbers in brackets are two physicians' scores.</title>
          <p><bold>Header:</bold> patient is seen in consult at the request of dr. &#60;Last Name &#62;. chief complaint: urinary incontinence.</p>
          <p><bold>Subjective:</bold> the patient is an 87-year-old female, what she describes just total incontinence. she wears 2 depends a day. interestingly, there is no nocturia, frequency, dysuria or hematuria . she wakes up in the morning and her diaper is soaked. she did have collagen implants to the urethra back in the 1980s and they worked for a while, she says. past medical history: positive for atrial fibrillation, copd, congestive heart failure, diet-controlled diabetes, reflux, elevated lipids, hypertension, hypothyroidism and vitamin d deficiency. past surgical history: includes the contigen injections as noted, appendectomy, back surgery, right knee surgery, pacemaker placement and aortic valve replacement. tobacco use: none. ethanol use: none. social history: the patient is retired. family history: benign. allergies: amlodipine, lipitor, metformin, codeine, morphine, propoxyphene, tramadol and vicodin. medications: include aspirin, crestor, cyanocobalamin injections, furosemide, irbesartan, klor-con, levothyroid, meclizine, metoprolol, nasonex, nitroglycerin, ventolin inhaler and coumadin. </p>
          <p><bold>Objective:</bold> physical examination: back: shows cva tenderness. abdomen: benign.</p>
          <p><bold>Physician:</bold> urinary incontinence for a week, completely stress incontinence. there is no urgency. &#60;4,3&#62;</p>
          <p><bold>PG+Coverage:</bold> assessment: the patient is doing well she has a history of atrial fibrillation, congestive heart failure, congestive heart failure, congestive heart failure, congestive heart failure, coronary artery disease, congestive heart failure, coronary artery disease and coronary artery disease. &#60;1,1&#62;</p>
          <p><bold>N2MAG+Coverage:</bold> assessment: outlet obstruction secondary to bph, not requiring therapy, there is no evidence of urinary tract infection or urinary tract infection. &#60;1,2&#62;
</p>
        </boxed-text>
        <p>Our results show that physician evaluators provided low scores for doctors’ assessments, mainly due to inadequate coverage. For example, in the previous example, our two physician evaluators gave the doctors’ assessment scores of 4 and 3, because both considered that the doctor’s assessment was incomplete: The assessment only described one of the symptoms but failed to describe the possibility of urinary tract infection.</p>
        <p>As the world population is living longer, patients are increasingly having more complex diseases. At the same time, physicians are increasingly trained with specializations. We believe that N2MAG may be used as an efficient tool for clinical decision support.</p>
      </sec>
      <sec>
        <title>The Model Interpretation</title>
        <p>Interpretability or explainability is crucial for any clinical applications. However, interpretability is typically a well-known challenge for deep neural models. In contrast, our novel attention-over-attention mechanism architecture allows an excellent interpretability. For example, as shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>, by analyzing the attention weights for the Header section, when generating the word “healing,” the decoder mainly focuses on the words (green words) “postoperative visit status,” “right small finger,” and “neck” in the Header section. Therefore, these words summarize the main reason why patients visit the physician. Accordingly, the decoder is based on this information and extends to “postoperative visit status,” “right small finger,” and “neck,” from the Subjective and Objective sections. Based on the attention weights for the Subjective and Objective sections, the decoder is shown to mainly pay attention to the words (blue words) “very closely,” “well healed externally,” “metacarpal appears better aligned,” and “has exhibited bony healing.” From these words, we can see that the status of the patient is becoming better. By combining the aforementioned information, the decoder makes a decision to generate and output the word “healing” in the assessment.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Example for model interpretation.</p>
          </caption>
          <graphic xlink:href="medinform_v8i1e14971_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Conclusion and Future Direction</title>
        <p>In this paper, we proposed a novel neural model for EHR medical assessment generation (N2MAG). N2MAG takes on input as Subjective and Objective content and conditions of the chief complaint, and outputs Assessment in natural language. Our evaluation results show that N2MAG substantially outperformed other state-of-the-art machine learning models. In addition, a comparison between N2MAG and physician experts has shown that N2MAG performed equally or outperformed doctors in 36% assessments. As the medical domain has become more specialized, N2MAG has the potential to be used to as a clinical decision system by generating a medical assessment draft for physicians. N2MAG could highlight salient information, which may help physicians reduce the information overload burden and improve the efficiency. To improve N2MAG, we will increase the size of EHRs for training to mitigate data sparsity. We will also incorporate external knowledge resources such as clinical guidelines.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">N2MAG</term>
          <def>
            <p>the neural network model for medical assessment generation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">R-1</term>
          <def>
            <p>ROUGE-1</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">R-2</term>
          <def>
            <p>ROUGE-2</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">R-L</term>
          <def>
            <p>ROUGE-L</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">RNN</term>
          <def>
            <p>recurrent neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">ROUGE</term>
          <def>
            <p>Recall-Oriented Understudy for Gisting Evaluation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">SOAP</term>
          <def>
            <p>Subjective, Objective, Assessment, and Plan</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by the National Heart, Lung, and Blood Institute of the National Institutes of Health under award number R01HL125089. HY is also supported by grants R01DA045816, R01HL137794, R01LM012817, and R01HL135219. The content is solely the responsibility of the authors and does not represent the views of the National Institutes of Health or the Department of Veterans Affairs. This work was completed when BH was working in UMass Lowell as a postdoc research associate. BH is currently working at the Harbin institute of Technology, Shenzhen, as an assistant professor.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deliberato</surname>
              <given-names>RO</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>DJ</given-names>
            </name>
          </person-group>
          <article-title>Clinical Note Creation, Binning, and Artificial Intelligence</article-title>
          <source>JMIR Med Inform</source>
          <year>2017</year>
          <month>08</month>
          <day>03</day>
          <volume>5</volume>
          <issue>3</issue>
          <fpage>e24</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2017/3/e24/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.7627</pub-id>
          <pub-id pub-id-type="medline">28778845</pub-id>
          <pub-id pub-id-type="pii">v5i3e24</pub-id>
          <pub-id pub-id-type="pmcid">PMC5561387</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shickel</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Tighe</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bihorac</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rashidi</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Deep EHR: A Survey of Recent Advances in Deep Learning Techniques for Electronic Health Record (EHR) Analysis</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2018</year>
          <month>9</month>
          <volume>22</volume>
          <issue>5</issue>
          <fpage>1589</fpage>
          <lpage>1604</lpage>
          <pub-id pub-id-type="doi">10.1109/jbhi.2017.2767063</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bahadori</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Searles</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Multi-layer Representation Learning for Medical Concepts</article-title>
          <year>2016</year>
          <month>08</month>
          <day>13</day>
          <conf-name>Proceedings of the 22Nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. San Francisco, California, USA. 1495?1504</conf-name>
          <conf-date>August 13-17, 2016</conf-date>
          <conf-loc>San Francisco, California</conf-loc>
          <fpage>1495</fpage>
          <lpage>1504</lpage>
          <pub-id pub-id-type="doi">10.1145/2939672.2939823</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Wei</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Caiyun</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>Jie</given-names>
            </name>
          </person-group>
          <article-title>Multi-scale Convolutional Neural Networks for Lung Nodule Classification</article-title>
          <source>Inf Process Med Imaging</source>
          <year>2015</year>
          <volume>24</volume>
          <fpage>588</fpage>
          <lpage>99</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-319-19992-4_46</pub-id>
          <pub-id pub-id-type="medline">26221705</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weed</surname>
              <given-names>LL</given-names>
            </name>
          </person-group>
          <article-title>Medical Records That Guide and Teach</article-title>
          <source>N Engl J Med</source>
          <year>1968</year>
          <month>03</month>
          <day>14</day>
          <volume>278</volume>
          <issue>11</issue>
          <fpage>593</fpage>
          <lpage>600</lpage>
          <pub-id pub-id-type="doi">10.1056/nejm196803142781105</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Cunningham</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>McManus</surname>
              <given-names>DD</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Detection of Bleeding Events in Electronic Health Record Notes Using Convolutional Neural Network Models Enhanced With Recurrent Neural Network Autoencoders: Deep Learning Approach</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>02</month>
          <day>08</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>e10788</fpage>
          <pub-id pub-id-type="doi">10.2196/10788</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arbabi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Fidler</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Brudno</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Identifying Clinical Terms in Medical Text Using Ontology-Guided Machine Learning</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>05</month>
          <day>10</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>e12596</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/2/e12596/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12596</pub-id>
          <pub-id pub-id-type="medline">31094361</pub-id>
          <pub-id pub-id-type="pii">v7i2e12596</pub-id>
          <pub-id pub-id-type="pmcid">PMC6533869</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Fei</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>An investigation of single-domain and multidomain medication and adverse drug event relation extraction from electronic health record notes using advanced deep learning models</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2019</year>
          <month>07</month>
          <day>01</day>
          <volume>26</volume>
          <issue>7</issue>
          <fpage>646</fpage>
          <lpage>654</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocz018</pub-id>
          <pub-id pub-id-type="medline">30938761</pub-id>
          <pub-id pub-id-type="pii">5426087</pub-id>
          <pub-id pub-id-type="pmcid">PMC6562161</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yeh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Artificial Intelligence Learning Semantics via External Resources for Classifying Diagnosis Codes in Discharge Notes</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>11</month>
          <day>06</day>
          <volume>19</volume>
          <issue>11</issue>
          <fpage>e380</fpage>
          <pub-id pub-id-type="doi">10.2196/jmir.8344</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>O'Malley</surname>
              <given-names>Kimberly J</given-names>
            </name>
            <name name-style="western">
              <surname>Cook</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Price</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wildes</surname>
              <given-names>Kimberly Raiford</given-names>
            </name>
            <name name-style="western">
              <surname>Hurdle</surname>
              <given-names>John F</given-names>
            </name>
            <name name-style="western">
              <surname>Ashton</surname>
              <given-names>Carol M</given-names>
            </name>
          </person-group>
          <article-title>Measuring diagnoses: ICD code accuracy</article-title>
          <source>Health Serv Res</source>
          <year>2005</year>
          <month>10</month>
          <volume>40</volume>
          <issue>5 Pt 2</issue>
          <fpage>1620</fpage>
          <lpage>39</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/16178999"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1475-6773.2005.00444.x</pub-id>
          <pub-id pub-id-type="medline">16178999</pub-id>
          <pub-id pub-id-type="pii">HESR444</pub-id>
          <pub-id pub-id-type="pmcid">PMC1361216</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Baotian</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Qingcai</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Fangze</given-names>
            </name>
          </person-group>
          <article-title>LCSTS: A Large Scale Chinese Short Text Summarization Dataset</article-title>
          <source>Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2015</year>
          <month>09</month>
          <conf-name>The 2015 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>September 2015</conf-date>
          <conf-loc>Lisbon, Portugal</conf-loc>
          <fpage>1967</fpage>
          <lpage>1972</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/D15-1229</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bahdanau</surname>
              <given-names>Dzmitry</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>Kyunghyun</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Yoshua</given-names>
            </name>
          </person-group>
          <source>arXiv.org</source>
          <year>2014</year>
          <access-date>2019-12-24</access-date>
          <comment>Neural Machine Translation by Jointly Learning to Align and Translate <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1409.0473">http://arxiv.org/abs/1409.0473</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>Jiatao</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Zhengdong</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Hang</given-names>
            </name>
          </person-group>
          <article-title>Incorporating Copying Mechanism in Sequence-to-Sequence Learning</article-title>
          <source>Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</source>
          <year>2016</year>
          <month>08</month>
          <conf-name>The 54th Annual Meeting of the Association for Computational Linguistics  (Volume 1: Long Papers)</conf-name>
          <conf-date>2016</conf-date>
          <conf-loc>Berlin, Germany</conf-loc>
          <fpage>1631</fpage>
          <lpage>1640</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.aclweb.org/anthology/P16-1154"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/P16-1154</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vinyals</surname>
              <given-names>Oriol</given-names>
            </name>
            <name name-style="western">
              <surname>Fortunato</surname>
              <given-names>Meire</given-names>
            </name>
            <name name-style="western">
              <surname>Jaitly</surname>
              <given-names>Navdeep</given-names>
            </name>
          </person-group>
          <article-title>Pointer Networks</article-title>
          <source>Proceedings of the 28th International Conference on Neural Information Processing Systems - Volume 2</source>
          <year>2015</year>
          <month>12</month>
          <day>07</day>
          <conf-name>The 28th International Conference on Neural Information Processing Systems - Volume 2</conf-name>
          <conf-date>December 07-12, 2015</conf-date>
          <conf-loc>Montreal, Canada</conf-loc>
          <fpage>2692</fpage>
          <lpage>2700</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://papers.nips.cc/paper/5866-pointer-networks.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>Zhaopeng</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Zhengdong</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Yang</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Xiaohua</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Hang</given-names>
            </name>
          </person-group>
          <article-title>Modeling Coverage for Neural Machine Translation</article-title>
          <source>Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</source>
          <year>2016</year>
          <month>08</month>
          <conf-name>The 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Berlin, Germany: : Association for Computational Linguistics . 76?85</conf-name>
          <conf-date>2016</conf-date>
          <conf-loc>Berlin, Germany</conf-loc>
          <fpage>76</fpage>
          <lpage>85</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.aclweb.org/anthology/P16-1008"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/p16-1008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>See</surname>
              <given-names>Abigail</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Peter J</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>Christopher D</given-names>
            </name>
          </person-group>
          <article-title>Get To The Pointummarization with Pointer-Generator Networks</article-title>
          <source>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</source>
          <year>2017</year>
          <month>07</month>
          <conf-name>The 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</conf-name>
          <conf-date>July,2017</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <fpage>1073</fpage>
          <lpage>1083</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1704.04368"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/p17-1099</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Comput</source>
          <year>1997</year>
          <month>11</month>
          <day>15</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
          <pub-id pub-id-type="medline">9377276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>Ilya</given-names>
            </name>
            <name name-style="western">
              <surname>Vinyals</surname>
              <given-names>Oriol</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Quoc V</given-names>
            </name>
          </person-group>
          <article-title>Sequence to Sequence Learning with Neural Networks</article-title>
          <source>Proceedings of the 27th International Conference on Neural Information Processing Systems - Volume 2</source>
          <year>2014</year>
          <month>12</month>
          <day>08</day>
          <conf-name>The 27th International Conference on Neural Information Processing Systems - Volume 2</conf-name>
          <conf-date>December 08-13, 2014</conf-date>
          <conf-loc>Montreal, Canada</conf-loc>
          <fpage>3104</fpage>
          <lpage>3112</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Shiqi</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>Yong</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Zhongjun</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>He</given-names>
            </name>
            <name name-style="western">
              <surname>Hua</surname>
              <given-names>Wu</given-names>
            </name>
            <name name-style="western">
              <surname>Maosong</surname>
              <given-names>Sun</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Liu</given-names>
            </name>
          </person-group>
          <article-title>Minimum Risk Training for Neural Machine Translation</article-title>
          <source>Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2016</year>
          <month>08</month>
          <conf-name>The 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</conf-name>
          <conf-date>2016</conf-date>
          <conf-loc>Berlin, Germany</conf-loc>
          <fpage>1683</fpage>
          <lpage>1692</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.aclweb.org/anthology/P16-1159"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/p16-1159</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Boxing</given-names>
            </name>
            <name name-style="western">
              <surname>Cherry</surname>
              <given-names>Colin</given-names>
            </name>
            <name name-style="western">
              <surname>Foster</surname>
              <given-names>George</given-names>
            </name>
            <name name-style="western">
              <surname>Larkin</surname>
              <given-names>Samuel</given-names>
            </name>
          </person-group>
          <article-title>Cost Weighting for Neural Machine Translation Domain Adaptation</article-title>
          <source>Proceedings of the First Workshop on Neural Machine Translation</source>
          <year>2017</year>
          <month>08</month>
          <conf-name>The First Workshop on Neural Machine Translation</conf-name>
          <conf-date>2017</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <fpage>40</fpage>
          <lpage>46</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://aclweb.org/anthology/W17-3205"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/w17-3205</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Duchi</surname>
              <given-names>John</given-names>
            </name>
            <name name-style="western">
              <surname>Hazan</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Singer</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Adaptive Subgradient Methods for Online Learning and Stochastic Optimization</article-title>
          <source>The Journal of Machine Learning Research</source>
          <year>2011</year>
          <month>02</month>
          <day>01</day>
          <volume>12</volume>
          <fpage>2121</fpage>
          <lpage>2159</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Caruana</surname>
              <given-names>Rich</given-names>
            </name>
            <name name-style="western">
              <surname>Steve</surname>
              <given-names>Lawrence</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Giles</given-names>
            </name>
          </person-group>
          <article-title>Overfitting in Neural Nets: Backpropagation, Conjugate Gradient, and Early Stopping</article-title>
          <year>2000</year>
          <conf-name>The 13th International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>2000</conf-date>
          <conf-loc>Denver, CO</conf-loc>
          <fpage>381</fpage>
          <lpage>387</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Chin-Yew</given-names>
            </name>
          </person-group>
          <article-title>ROUGE: A Package for Automatic Evaluation of Summaries</article-title>
          <source>Proceedings of the ACL-04 Workshop</source>
          <year>2004</year>
          <conf-name>The ACL-04 Workshop</conf-name>
          <conf-date>2004</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <fpage>74</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.aclweb.org/anthology/W04-1013"/>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
