<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i5e24803</article-id>
      <article-id pub-id-type="pmid">33820755</article-id>
      <article-id pub-id-type="doi">10.2196/24803</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>An Attention Model With Transfer Embeddings to Classify Pneumonia-Related Bilingual Imaging Reports: Algorithm Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sheikhalishahi</surname>
            <given-names>Seyedmostafa</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Aczon</surname>
            <given-names>Melissa</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Abeysinghe</surname>
            <given-names>Rashmie</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wang</surname>
            <given-names>Xianglong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Park</surname>
            <given-names>Hyung</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5523-5144</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Song</surname>
            <given-names>Min</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3255-1600</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>Eun Byul</given-names>
          </name>
          <degrees>BA</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4859-4480</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Seo</surname>
            <given-names>Bo Kyung</given-names>
          </name>
          <degrees>BA</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4788-6748</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Choi</surname>
            <given-names>Chang Min</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Pulmonary and Critical Care Medicine</institution>
            <institution>Asan Medical Center</institution>
            <addr-line>Olympic-ro 43-gil</addr-line>
            <addr-line>Seoul, 05505</addr-line>
            <country>Republic of Korea</country>
            <fax>82 2 3010 6968</fax>
            <phone>82 2 3010 5902</phone>
            <email>ccm9607@gmail.com</email>
          </address>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2881-4669</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Pulmonary and Critical Care Medicine</institution>
        <institution>Asan Medical Center</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Yonsei University</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Oncology</institution>
        <institution>Asan Medical Center</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Chang Min Choi <email>ccm9607@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>17</day>
        <month>5</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>5</issue>
      <elocation-id>e24803</elocation-id>
      <history>
        <date date-type="received">
          <day>6</day>
          <month>10</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>28</day>
          <month>10</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>21</day>
          <month>12</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>4</day>
          <month>4</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Hyung Park, Min Song, Eun Byul Lee, Bo Kyung Seo, Chang Min Choi. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 17.05.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/5/e24803" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>In the analysis of electronic health records, proper labeling of outcomes is mandatory. To obtain proper information from radiologic reports, several studies were conducted to classify radiologic reports using deep learning. However, the classification of pneumonia in bilingual radiologic reports has not been conducted previously.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this research was to classify radiologic reports into pneumonia or no pneumonia using a deep learning method.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>A data set of radiology reports for chest computed tomography and chest x-rays of surgical patients from January 2008 to January 2018 in the Asan Medical Center in Korea was retrospectively analyzed. The classification performance of our long short-term memory (LSTM)–Attention model was compared with various deep learning and machine learning methods. The area under the receiver operating characteristic curve (AUROC), area under the precision-recall curve, sensitivity, specificity, accuracy, and F1 score for the models were compared.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>A total of 5450 radiologic reports were included that contained at least one pneumonia-related word. In the test set (n=1090), our proposed model showed 91.01% (992/1090) accuracy (AUROCs for negative, positive, and obscure were 0.98, 0.97, and 0.90, respectively). The top 3 performances of the models were based on FastText or LSTM. The convolutional neural network–based model showed a lower accuracy 73.03% (796/1090) than the other 2 algorithms. The classification of negative results had an F1 score of 0.96, whereas the classification of positive and uncertain results showed a lower performance (positive F1 score 0.83; uncertain F1 score 0.62). In the extra-validation set, our model showed 80.0% (642/803) accuracy (AUROCs for negative, positive, and obscure were 0.92, 0.96, and 0.84, respectively).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our method showed excellent performance in classifying pneumonia in bilingual radiologic reports. The method could enrich the research on pneumonia by obtaining exact outcomes from electronic health data.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>deep learning</kwd>
        <kwd>natural language process</kwd>
        <kwd>attention</kwd>
        <kwd>clinical data</kwd>
        <kwd>pneumonia</kwd>
        <kwd>classification</kwd>
        <kwd>medical imaging</kwd>
        <kwd>electronic health record</kwd>
        <kwd>machine learning</kwd>
        <kwd>model</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Electronic health records (EHRs) have become increasingly incorporated into clinical practices in hospitals over the past few decades [<xref ref-type="bibr" rid="ref1">1</xref>]. EHR data are voluminous and can be used as real-world evidence if they are analyzed with proper methods [<xref ref-type="bibr" rid="ref2">2</xref>]. However, the data are not collected for research purposes [<xref ref-type="bibr" rid="ref2">2</xref>], and several rule-based methods are used to extract particular outcomes from the data set. There have been numerous studies where analyses were performed using EHR data with labels such as <italic>sepsis</italic> defined by rule-based outcomes [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. However, defining outcomes other than laboratory findings is difficult because the data are unstructured and written as natural language. For this reason, a previous study that used the outcome <italic>pneumonia</italic> defined pneumonia by its International Classification of Diseases, Ninth Revision, Clinical Modification (ICD-9-CM) code [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. However, the use of ICD codes as a label does not contain temporal information, such as the exact time of diagnosis during hospital admission, and it is hard to perform time series analysis with this limited information.</p>
      <p>Although medical imaging reports contain a great deal of information regarding diagnosis and clinical features, it is hard to analyze the information because they are formatted as unstructured free text and are variably written depending on the radiologist.[<xref ref-type="bibr" rid="ref9">9</xref>] For this reason, medical imaging reports are rarely used as outcomes in big data analysis [<xref ref-type="bibr" rid="ref10">10</xref>]. However, as long as pneumonia can be identified in radiologic reports, other important information, such as the time of onset and the presence of pneumonia during admission, can also be derived. Moreover, labeled data are essential in deep learning because the analysis requires millions of observations to reach acceptable performance levels [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
      <p>As of 2018, 43 studies using natural language processing for the identification of chronic diseases in EHRs had been published, and only recently have there been more studies conducted on this topic using deep learning [<xref ref-type="bibr" rid="ref12">12</xref>]. Especially in deep learning, convolutional neural network (CNN)–based models have shown significant accuracy in extracting pulmonary embolism [<xref ref-type="bibr" rid="ref10">10</xref>] and pulmonary infection from medical reports [<xref ref-type="bibr" rid="ref1">1</xref>]. The model can be used to classify diagnosis from whole medical records even when they are written in the Chinese language [<xref ref-type="bibr" rid="ref13">13</xref>], and a recurrent neural network–based model has been used for classifying stroke and identifying its location [<xref ref-type="bibr" rid="ref14">14</xref>]. However, the use of bilingual clinical reports is common for EHRs in non–English-speaking countries.</p>
      <p>The purpose of our study was to classify reports of pneumonia consisting of findings derived during the pre- and postoperative period of a major surgery that were written as bilingual texts (English and Korean). We compared the performance of traditional models with deep learning models, with the latter showing excellent performance in previous studies, and identified the best performing model as an attention-based bidirectional long short-term memory (Bi-LSTM) model neural network.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Clinical Data</title>
        <p>We retrospectively included radiology reports for chest computed tomography (CT) and chest x-rays of surgical patients from January 2008 to January 2018 in the Asan Medical Center in Korea. The patients had undergone upper abdominal and thoracic surgeries, as coded by the ICD-9-CM. Detailed criteria for the surgery are described in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>The radiology reports consist of chest CT and chest x-rays (posteroanterior and anteroposterior) that are extracted by radiology procedure codes. The chest x-ray reports have no structured format and only contain descriptions. The chest CT reports consist of the short history of the patients, the findings, and a conclusion; however, the format varies depending on the writing style of the radiologist. The conclusions in around half of the chest CT reports were omitted due to the different writing style of the radiologists. Therefore, we used only the findings of chest CT and the descriptions of chest x-rays to classify the labels, and all the annotation was based solely on the description of each report.</p>
        <p>Usually, the pneumonia incidence in surgical patients is around 1%, suggesting that reports of pneumonia are rare. To overcome the imbalance of the positive and negative data sets, we only included radiologic reports that contained pneumonia-related words. The words representing pneumonia were as follows: “pneumoni-,” “consolid-,” “infiltra-,” “bronchiole-,” “hazi-,” “hazzi-,” “opacit-,” and “GGO”.</p>
        <p>From a total of 1,088,680 radiology reports, 886,248 were included after reports with inappropriate surgical procedures were excluded. The detailed inclusion criteria of the appropriate procedures have been described in a previous study [<xref ref-type="bibr" rid="ref3">3</xref>]. After extracting the pneumonia-related words, 23,377 reports were included.</p>
      </sec>
      <sec>
        <title>Report Annotation</title>
        <p>Among the 23,377 reports, a total of 5450 annotated reports were used to train our model. A clinician annotated the 5450 reports and used them for training and validation. After training the model, 2 different clinicians, who worked independently from the first clinician, annotated another 1000 reports for an extra-validation set (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <p>All document-level annotations by clinicians included 3 categories for pneumonia: negative, positive, and unclear (obscure). The positive pneumonia reports included postoperative infection reports and did not contain reports for noninfectious diseases, such as organizing pneumonia or interstitial lung disease, because the label was required to represent pneumonia as a perioperative complication. The excluded reports were labeled as negative reports. It was observed that 895 reports were pneumonia positive, 4005 reports were pneumonia negative, and 550 reports were obscure results. In the extra-validation set, 2 clinicians independently labeled the radiologic reports on the basis of the clinical importance of the findings. To overcome the human error of the 2 clinicians, the consensus label of the 2 clinicians was regarded as the reference standard. An interrater reliability (k score) was calculated by Cohen κ value.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Radiologic reports flowchart.</p>
          </caption>
          <graphic xlink:href="medinform_v9i5e24803_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>This study was approved by the ethics committee of the Asan Medical Center (approval no. 2018-1122), and the need to obtain informed consent was waived because of the retrospective observational nature of the study. The clinical data that were extracted using the Asan Biomedical Research Environment system were indexed by deidentified encrypted patient ID numbers so that the researchers would not be able identify the patients [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>].</p>
      </sec>
      <sec>
        <title>Proposed Approach</title>
        <p>As most of the verbs and adjectives in clinical reports are written in Korean, and most of nouns (usually the names of the diseases) are written in English, we had to consider 2 different languages. Therefore, we proposed a new method for a bilingual clinical data set based on the classification algorithm of combining substring and translation embeddings (Kor2Eng) with an attention-based Bi-LSTM neural network (LSTM-Attention). <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> Figure S4 shows the architecture of our proposed model.</p>
        <p>The proposed method includes 3 steps: (1) text preprocessing; (2) word representation, which is composed of substring and Korean-to-English (Kor2Eng) embeddings; and (3) training of the classification model.</p>
        <p>Our data set, which is a description of x-ray and CT, is composed of a mix of Korean and English sentences. Therefore, specific preprocessing is required before the statements are fed into the classification model. The detailed methods for text preprocessing and training are described in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      </sec>
      <sec>
        <title>Kor2Eng Transfer Embedding</title>
        <p>Training word vectors require a considerable amount of data and time. Therefore, we applied embeddings by training them independently on monolingual data and pretraining them with Wikipedia data. However, due to the characteristics of data, the text of the clinical notes was a mixture of English and Korean. If a monolingual embedding were to be used for this data, one side of the information would be lost. To reduce the loss of information, we used a translation method that converts the vector of Korean words into the vector of English words with similar meanings. The unsupervised method of translating the source language into the target language was proposed by Lample et al [<xref ref-type="bibr" rid="ref17">17</xref>]. In this method, the process of learning a mapping occurs between the 2 sets of embedding in the shared space. We trained the subword embedding model to learn Korean-to-English mapping using the unsupervised method without any parallel data.</p>
      </sec>
      <sec>
        <title>Deep Learning–Based Classification Model</title>
        <p>We built an attention-based deep neural network using LSTM. LSTM is a recurrent neural network variant that alleviates the vanishing gradient problem by learning and remembering long-term dependencies [<xref ref-type="bibr" rid="ref18">18</xref>] and consists of a cell memory state and 3 gates.</p>
        <p>The Bi-LSTM consists of a forward–backward LSTM layer [<xref ref-type="bibr" rid="ref19">19</xref>]. Both layers are connected to the same output layer. Our classification model used Bi-LSTM with the attention mechanism. This allowed the model to simultaneously handle information from different positions.</p>
        <p><xref rid="figure2" ref-type="fig">Figure 2</xref> shows the architecture of the deep learning–based classification model. First, the input is fed into the Bi-LSTM layer. Second, the output of the Bi-LSTM layer is fed into the attention layer (Bi-LSTM–Attention) for attending important words. Finally, the output of the attention weight passes through the softmax layer for classification.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>The architectures of a deep learning-based classification model.
Each input receives an embedding of English translated from Korean.  In the attention layer, each word has an attention weight which is translated into the importance for prediction. Bi-LSTM: bidirectional long short-term memory model.</p>
          </caption>
          <graphic xlink:href="medinform_v9i5e24803_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The performance metrics (ie, precision, recall [sensitivity], and <italic>F</italic><sub>1</sub> score) were used to evaluate the models. The accuracy, area under the receiver operating characteristic curve (AUROC), and area under the precision-recall curve (AUPRC) were used to compare the models. For analyzing the multilabel data set, labels were treated as interested labels and other labels in evaluating each metric. For example, when we treated the precision for negative labels, only the true negative data were treated as true labels while positive and obscure labels were treated as false labels. <italic>F</italic><sub>1</sub> score is the weighted average of precision and recall, and it is used to measure the performance of a model when the data consist of uneven class distributions [<xref ref-type="bibr" rid="ref20">20</xref>]. The statistical analysis was performed on Python 3.7.6 (Python Software Foundation).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>In this section, we evaluated the performance of the various classification models. To demonstrate the performance of our method, we compare the proposed model with traditional machine learning and other deep learning models. The machine learning models included logistic regression [<xref ref-type="bibr" rid="ref21">21</xref>], support vector machine [<xref ref-type="bibr" rid="ref22">22</xref>], Naïve Bayes regression [<xref ref-type="bibr" rid="ref23">23</xref>], K-nearest neighbors algorithm [<xref ref-type="bibr" rid="ref24">24</xref>], decision tree [<xref ref-type="bibr" rid="ref25">25</xref>], and random forest [<xref ref-type="bibr" rid="ref26">26</xref>]. The deep learning models included the word-to-vector representation model (Word2Vec) [<xref ref-type="bibr" rid="ref27">27</xref>], FastText [<xref ref-type="bibr" rid="ref17">17</xref>], CNN [<xref ref-type="bibr" rid="ref28">28</xref>], and LSTM [<xref ref-type="bibr" rid="ref29">29</xref>]. The details of each model are described in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      <p>Out of 5450 data sets, 4005 did not contain pneumonia, 895 contained pneumonia, and 550 were obscure, with 80% being used in the training set and the remaining 20% in test set. The test set was composed of no pneumonia (n=801), pneumonia (n=179), and obscure (n=110) classifications. The extra-validation set was annotated by 2 independent clinicians. Out of a total of 1000 radiologic reports, 803 labels were agreed upon by 2 independent clinicians. Among these labels, 498 did not contain pneumonia, 185 contained pneumonia, and 120 were obscure cases.</p>
      <sec>
        <title>Accuracy of Our Model as Compared to Previous Models</title>
        <p>We evaluated the performance of the different models to find the best model. As shown in <xref ref-type="table" rid="table1">Table 1</xref>, the prediction accuracy changed depending on the model. The traditional models (ie, support vector machine, Naïve Bayes, etc) achieved an accuracy between 64.03% and 83.03%. The logistic regression showed a reasonable performance with an accuracy of 83.03% (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> Table S1).</p>
        <p>The deep learning–based methods (ie, FastText, Word2Vec with Bi-LSTM–Attention, and the proposed model) outperformed the traditional models. The prediction accuracy of the deep learning models was 90.00%, 88.99%, and 91.01% for FastText, Word2Vec with Bi-LSTMAttention, and the proposed model, respectively. These deep learning models showed a 10% higher accuracy than did the traditional machine learning methods because sentence classification required the interpretation of complex features. The proposed model achieved the highest performance compared to the other deep learning models (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> Table S1).</p>
      </sec>
      <sec>
        <title>Model Accuracy Based on the Different Representation Methods of Words</title>
        <p>We evaluated the performance based on different methods of word representation. The Word2Vec with Bi-LSTM–Attention model is a more commonly used language representation model. The model showed a higher accuracy and <italic>F</italic><sub>1</sub> score than did the traditional models; however, the drawback associated with this model is that the foreign language is not represented (<xref ref-type="table" rid="table1">Table 1</xref>). We implemented another representation method with a substring using the FastText model. This method involves slicing of words to bunches of characters, which can be a better expression for the foreign language. The substring with FastText model achieved a precision of 93% for negative, 84% for positive, and 74% for obscure classifications; and a recall of 93% for negative, 84% for positive, and 47% for obscure classifications. The substring with FastText model showed a better performance than did the Word2Vec model according to <italic>F</italic><sub>1</sub> score.</p>
        <p>Our proposed model (Kor2Eng) translated Korean to English before the prediction process. The proposed model achieved a precision of 96%, 86%, and 61%, and a recall of 97%, 80%, and 64% for positive, negative, and obscure classifications, respectively. The AUROC of the model was 0.98 for negative, 0.97 for positive, and 0.90 for obscure classifications, while the AUPRC was 0.99 for negative, 0.87 for positive, and 0.62 for obscure classifications (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> Figure S5). Compared to the classification of the negative labels, which was a relatively easy task (96% of negative), classifying positive or obscure labels was a harder task and showed a rather lower <italic>F</italic><sub>1</sub> score (83% for positive and 62% for obscure). For classifying the obscure classification, our model showed the highest performance among different representation methods (substring with FastText, Word2Vec, and Kor2Eng).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>The detailed performance of the top 3 best-performing models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="220"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Models</td>
                <td colspan="2">Precision, n/N (%) </td>
                <td colspan="3">Recall, n/N (%) </td>
                <td colspan="3"><italic>F</italic><sub>1</sub> score (%) </td>
                <td colspan="2">AUROC<sup>a</sup></td>
                <td>AUPRC<sup>b</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="14"><bold>Substring+FastText</bold> [<xref ref-type="bibr" rid="ref17">17</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Negative</td>
                <td colspan="4">776/819 (94.7)</td>
                <td colspan="3">776/801 (96.9)</td>
                <td colspan="2">96</td>
                <td colspan="2">0.82</td>
                <td>0.92</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Positive</td>
                <td colspan="4">153/593 (25.8)</td>
                <td colspan="3">153/179 (85.5)</td>
                <td colspan="2">83</td>
                <td colspan="2">0.74</td>
                <td>0.34</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Obscure</td>
                <td colspan="4">52/73 (71.2)</td>
                <td colspan="3">52/110 (47.3)</td>
                <td colspan="2">57</td>
                <td colspan="2">0.71</td>
                <td>0.22</td>
              </tr>
              <tr valign="top">
                <td colspan="14">
                  <bold>Word2Vec<sup>c</sup>+Bi-LSTM<sup>d</sup>–Attention</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Negative</td>
                <td colspan="2">772/849 (90.9)</td>
                <td colspan="3">772/801 (96.4)</td>
                <td colspan="3">94</td>
                <td colspan="2">0.95</td>
                <td colspan="2">0.98</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Positive</td>
                <td colspan="2">153/222 (68.9)</td>
                <td colspan="3">153/179 (85.5)</td>
                <td colspan="3">81</td>
                <td colspan="2">0.96</td>
                <td colspan="2">0.87</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Obscure</td>
                <td colspan="2">47/80 (58.8)</td>
                <td colspan="3">47/110 (42.7)</td>
                <td colspan="3">49</td>
                <td colspan="2">0.88</td>
                <td colspan="2">0.51</td>
              </tr>
              <tr valign="top">
                <td colspan="14">
                  <bold>Proposed model (Kor2Eng<sup>e</sup>)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Negative</td>
                <td colspan="2">776/809 (95.9)</td>
                <td colspan="3">776/801 (96.9)</td>
                <td colspan="3">96</td>
                <td colspan="2">0.98</td>
                <td colspan="2">0.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Positive</td>
                <td colspan="2">153/182 (84.1)</td>
                <td colspan="3">153/179 (85.5)</td>
                <td colspan="3">83</td>
                <td colspan="2">0.97</td>
                <td colspan="2">0.87</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Obscure</td>
                <td colspan="2">70/115 (60.9)</td>
                <td colspan="3">70/110 (63.6)</td>
                <td colspan="3">62</td>
                <td colspan="2">0.90</td>
                <td colspan="2">0.62</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>AUROC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>AUPRC: area under the precision-recall curve.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>Word2Vec: the word-to-vector representation model.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>Bi-LSTM: bidirectional long short-term memory model.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>Kor2Eng: Korean to English.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Visualization of Relative Importance</title>
        <p>We visualized the weighted words when the proposed model classified the input data. In the attention model, the weight of each word could be used for classifying the reports. Based on the intensity of color, the importance of a word was indicated when the proposed model determined the class of the input data. Darker colors indicated a higher importance for classifying pneumonia. <xref rid="figure3" ref-type="fig">Figure 3</xref> shows the instances where the proposed model predicted pneumonia reports correctly. For example, the highlighted words “Peribronchial,” “infiltration,” “suspected,” and “bronchopneumonia” indicate pneumonia (<xref rid="figure3" ref-type="fig">Figure 3</xref>a). In the bilingual texts (<xref rid="figure3" ref-type="fig">Figure 3</xref>f), the following words are important to classifying pneumonia-reports: “두드러져,” “bronchopneumonia,” “aspiration,” and “pneumonia.”</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Visualization of the importance of words by attention weights.The darker the color is, the greater the importance of the words for predicting the pneumonia label. High attention weight is depicted in the darker color. Words with high attention weights are shown.</p>
          </caption>
          <graphic xlink:href="medinform_v9i5e24803_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Extra Validations</title>
        <p>As an extra validation of our proposed model, 2 clinicians labeled an additional data set. The data set was randomly selected from the entire data set, excluding the previously trained data. For precise labeling, 2 medical doctors each labeled the records. Of the 1000 records, 803 were agreed upon by 2 independent physicians. The Cohen κ value of the clinicians’ label was 0.63 (95% CI 0.59-0.67). <xref ref-type="table" rid="table2">Table 2</xref> shows the performance results of the proposed model with the extra-validation data set. The AUROC and AUPRC for positive labels were slightly lower in the extra-validation set than in the test set (<xref rid="figure4" ref-type="fig">Figure 4</xref>). The <italic>F</italic><sub>1</sub> score of positive labels was similar to that of the training data; however, predicting negative and obscure labels showed a relatively poor performance as compared to the training data set according to <italic>F</italic><sub>1</sub> score. The overall accuracy of our model was 80.0%.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Extra validation of the proposed Korean-to-English (Kor2Eng) model.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="190"/>
            <col width="170"/>
            <col width="170"/>
            <col width="170"/>
            <col width="150"/>
            <col width="150"/>
            <thead>
              <tr valign="bottom">
                <td>Class</td>
                <td>Precision, n/N (%)</td>
                <td>Recall, n/N (%)</td>
                <td><italic>F</italic><sub>1</sub> score</td>
                <td>AUROC<sup>a</sup></td>
                <td>AUPRC<sup>b</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Negative</td>
                <td>422/470 (89.8%)</td>
                <td>422/498 (84.7%)</td>
                <td>87%</td>
                <td>0.92</td>
                <td>0.94</td>
              </tr>
              <tr valign="top">
                <td>Positive</td>
                <td>142/155 (91.6%)</td>
                <td>142/185 (76.8%)</td>
                <td>84%</td>
                <td>0.96</td>
                <td>0.91</td>
              </tr>
              <tr valign="top">
                <td>Obscure</td>
                <td>77/178 (43.3%)</td>
                <td>77/120 (64.2%)</td>
                <td>52%</td>
                <td>0.84</td>
                <td>0.42</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>AUROC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>AUPRC: area under the precision-recall curve</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>AUROC and AUPRC of our proposed model in the extra-validation set. AUROC: area under the receiver operating characteristic curve; AUPRC: area under the precision-recall curve.</p>
          </caption>
          <graphic xlink:href="medinform_v9i5e24803_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Discussion</title>
      <p>The purpose of the Kor2Eng model is to classify pneumonia-related medical records written in Korean and English. Our proposed model showed 91.01% accuracy in the test set and 80.0% accuracy in the extra-validation set for classifying pneumonia reports. Appropriate classification of radiologic reports is mandatory for further analysis regarding pneumonia through EMRs. As compared to other models, such as CNN or traditional machine learning models, our model showed better performance. The 3 best-performing models (Word2Vec with Bi-LSTM–Attention, FastText, and the proposed model) demonstrated better performance than did the traditional and CNN models, and our proposed model provided the highest AUROC and AUPRC among the top 3 models. Because too many false-positives may lead to clinician exhaustion, a model with excellent performance is desirable. We consider that a model with an AUROC of at least 0.95 can be used in clinical practice or for labeling the data set. The false-positive results of pneumonia reports can be additionally filtered with other clinical findings such as respiratory symptoms or antibiotics use, as pneumonia is defined by respiratory symptoms with radiologic findings [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
      <p>The label balance of the data set was a consequence of excluding irrelevant labels to our target. As the reports that do not have pneumonia-related words can be considered pneumonia-negative radiologic reports, the reports requiring classification must contain at least one of the pneumonia-related words such as “consolidation” or “haziness”. Excluding the irrelevant label is clinically appropriate and balances the data set with each label, with the balanced data set mitigating the overestimation of the model. Furthermore, filtering radiologic reports containing relevant words might make the data set rather homogenous, which makes classification a hard task. Our model showed an excellent performance in classifying pneumonia, and thus, it can be used for auto-labeling in classifying pneumonia reports.</p>
      <p>A notable observation is the discrepancy between the test and extra-validation set. The model showed a rather similar performance in classifying negative and positive cases and a relatively poor performance in obscure cases. One reason for this discrepancy might be that 2 different clinicians annotated the entire extra-validation set. As some of the obscure cases are classified by the nuance of the context, the 2 clinicians might have differed in labeling the obscure cases. Therefore, the labeling of the obscure classification in the extra-validation set might have been different from that of the training set. The pneumonia cases in the report should only be decided by clinical situations, and thus, the importance of obscure cases should be evaluated in subsequent studies.</p>
      <p>Several studies have been conducted for classifying radiologic reports as positive or negative for a given disease [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>] or for classifying various diagnoses from medical records written in Chinese [<xref ref-type="bibr" rid="ref13">13</xref>]. Most of the studies used a CNN-based model and showed a better performance than did our model [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. In our study, we compared several deep learning models from logistic regression to LSTM with attention. The CNN model, which showed an excellent performance in previous studies [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>], was inferior to the attention-based LSTM model in our data set. The reason for its relatively poor performance might be explained by our data selection. We selected radiologic reports that had at least one of the pneumonia-related words. This selection made the radiologic reports relatively homogeneous compared to those used in previous studies, which might contain a wider variety of radiologic reports. As we compared the performance with the CNN model, our proposed model was found to be comparably accurate with those of previous studies and showed better performance.</p>
      <p>Radiologic reports in this study consisted of 2 languages: English and Korean. Compared to the English data set, the Korean word data set has a lack of studies in embedding and analyzing in deep learning. To overcome this limitation, we used unsupervised translation of Korean words to English words, which had pretrained embedding [<xref ref-type="bibr" rid="ref17">17</xref>]. Compared to the Word2Vec with Bi-LSTM–Attention model, the attention/LSTM model with transfer embedding showed a better performance in classification, especially for obscure labels. This method might be especially important in bilingual reports.</p>
      <p>Our study has several limitations. First, we only included reports from a single tertiary center of surgical in-patients. Our model might be inaccurate in a reporting style different from the one that we have incorporated. Thus, if the model used a data set from another reporting style, the model would need to be validated again. However, in this case, more labeled data might be available, and thus the applied method would show better performance in another data set, especially for bilingual text reports. Second, we could not compare the exact same models with the previous models that showed good performance. However, we compared our model with various deep learning models that were used in previous studies, which is sufficient to compare the performance of different model structures.</p>
      <p>In summary, our proposed model showed superior performance as compared to other algorithms in the classification of pneumonia from radiologic reports. In bilingual radiologic reports, the proposed method of transferring and Bi-LSTM–Attention model showed significant improvement in performance than did the previous high-performing models. We hope that this method could be used to enrich the research about pneumonia by obtaining exact outcomes from electronic health data.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary method and figures.</p>
        <media xlink:href="medinform_v9i5e24803_app1.docx" xlink:title="DOCX File , 603 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUPRC</term>
          <def>
            <p>area under the precision-recall curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUROC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">Bi-LSTM</term>
          <def>
            <p>bidirectional long short-term memory model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CT</term>
          <def>
            <p>computed tomography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ICD-9-CM</term>
          <def>
            <p>International Classification of Diseases, Ninth Revision, Clinical Modification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">Kor2Eng</term>
          <def>
            <p>Korean to English</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">LSTM</term>
          <def>
            <p>long short-term memory model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">Word2Vec</term>
          <def>
            <p>word-to-vector representation model</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the Bio-Synergy Research Project (no. NRF-2013M3A9C4078138) of the Ministry of Science, Information and Communications Technology, and Future Planning through the National Research Foundation.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>HJP and CMC contributed to the conception and design of the study, as well as the data acquisition. HJP, BYS, EBL, and MS contributed to the analysis and interpretation of the data. HJP, BYS, EBL, and MS drafted the manuscript. HJP, CMC, and MS contributed to the critical revision of the paper, and all authors gave final approval for publication.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kehl</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Elmarakeby</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nishino</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Van Allen</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Lepisto</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Hassett</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Schrag</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Assessment of deep natural language processing in ascertaining oncologic outcomes from radiology reports</article-title>
          <source>JAMA Oncol</source>
          <year>2019</year>
          <month>10</month>
          <day>01</day>
          <volume>5</volume>
          <issue>10</issue>
          <fpage>1421</fpage>
          <pub-id pub-id-type="doi">10.1001/jamaoncol.2019.1800</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sherman</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Dal</surname>
              <given-names>PGJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>GW</given-names>
            </name>
            <name name-style="western">
              <surname>Gross</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hunter</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>LaVange</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Marinac-Dabic</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Marks</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Robb</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Shuren</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Temple</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Woodcock</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yue</surname>
              <given-names>LQ</given-names>
            </name>
            <name name-style="western">
              <surname>Califf</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Real-world evidence - What is it and what can it tell us?</article-title>
          <source>N Engl J Med</source>
          <year>2016</year>
          <month>12</month>
          <day>08</day>
          <volume>375</volume>
          <issue>23</issue>
          <fpage>2293</fpage>
          <lpage>2297</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMsb1609216</pub-id>
          <pub-id pub-id-type="medline">27959688</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Hyung Jun</given-names>
            </name>
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>Dae Yon</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Wonjun</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Chang-Min</given-names>
            </name>
          </person-group>
          <article-title>Detection of bacteremia in surgical in-patients using recurrent neural network based on time series records: development and validation study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>08</month>
          <day>04</day>
          <volume>22</volume>
          <issue>8</issue>
          <fpage>e19512</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/8/e19512/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19512</pub-id>
          <pub-id pub-id-type="medline">32669261</pub-id>
          <pub-id pub-id-type="pii">v22i8e19512</pub-id>
          <pub-id pub-id-type="pmcid">PMC7435626</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Desautels</surname>
              <given-names>Thomas</given-names>
            </name>
            <name name-style="western">
              <surname>Calvert</surname>
              <given-names>Jacob</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>Jana</given-names>
            </name>
            <name name-style="western">
              <surname>Jay</surname>
              <given-names>Melissa</given-names>
            </name>
            <name name-style="western">
              <surname>Kerem</surname>
              <given-names>Yaniv</given-names>
            </name>
            <name name-style="western">
              <surname>Shieh</surname>
              <given-names>Lisa</given-names>
            </name>
            <name name-style="western">
              <surname>Shimabukuro</surname>
              <given-names>David</given-names>
            </name>
            <name name-style="western">
              <surname>Chettipally</surname>
              <given-names>Uli</given-names>
            </name>
            <name name-style="western">
              <surname>Feldman</surname>
              <given-names>Mitchell D</given-names>
            </name>
            <name name-style="western">
              <surname>Barton</surname>
              <given-names>Chris</given-names>
            </name>
            <name name-style="western">
              <surname>Wales</surname>
              <given-names>David J</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>Ritankar</given-names>
            </name>
          </person-group>
          <article-title>Prediction of sepsis in the intensive care unit with minimal electronic health record data: a machine learning approach</article-title>
          <source>JMIR Med Inform</source>
          <year>2016</year>
          <month>09</month>
          <day>30</day>
          <volume>4</volume>
          <issue>3</issue>
          <fpage>e28</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2016/3/e28/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.5909</pub-id>
          <pub-id pub-id-type="medline">27694098</pub-id>
          <pub-id pub-id-type="pii">v4i3e28</pub-id>
          <pub-id pub-id-type="pmcid">PMC5065680</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saqib</surname>
              <given-names>Mohammed</given-names>
            </name>
            <name name-style="western">
              <surname>Sha</surname>
              <given-names>Ying</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>May D</given-names>
            </name>
          </person-group>
          <article-title>Early prediction of sepsis in EMR records using traditional ML techniques and deep learning LSTM networks</article-title>
          <source>Annu Int Conf IEEE Eng Med Biol Soc</source>
          <year>2018</year>
          <month>07</month>
          <day>04</day>
          <volume>2018</volume>
          <issue>8</issue>
          <fpage>4038</fpage>
          <lpage>4041</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/8/e19512/"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/EMBC.2018.8513254</pub-id>
          <pub-id pub-id-type="medline">30441243</pub-id>
          <pub-id pub-id-type="pii">v22i8e19512</pub-id>
          <pub-id pub-id-type="pmcid">PMC7435626</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nemati</surname>
              <given-names>Shamim</given-names>
            </name>
            <name name-style="western">
              <surname>Holder</surname>
              <given-names>Andre</given-names>
            </name>
            <name name-style="western">
              <surname>Razmi</surname>
              <given-names>Fereshteh</given-names>
            </name>
            <name name-style="western">
              <surname>Stanley</surname>
              <given-names>Matthew D</given-names>
            </name>
            <name name-style="western">
              <surname>Clifford</surname>
              <given-names>Gari D</given-names>
            </name>
            <name name-style="western">
              <surname>Buchman</surname>
              <given-names>Timothy G</given-names>
            </name>
          </person-group>
          <article-title>An interpretable machine learning model for accurate prediction of sepsis in the ICU</article-title>
          <source>Crit Care Med</source>
          <year>2018</year>
          <month>04</month>
          <day>14</day>
          <volume>46</volume>
          <issue>4</issue>
          <fpage>547</fpage>
          <lpage>553</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29286945"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/CCM.0000000000002936</pub-id>
          <pub-id pub-id-type="medline">29286945</pub-id>
          <pub-id pub-id-type="pii">S1198-743X(20)30148-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC5851825</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huh</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Association of meteorological factors and atmospheric particulate matter with the incidence of pneumonia: an ecological study</article-title>
          <source>Clin Microbiol Infect</source>
          <year>2020</year>
          <month>12</month>
          <volume>26</volume>
          <issue>12</issue>
          <fpage>1676</fpage>
          <lpage>1683</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cmi.2020.03.006</pub-id>
          <pub-id pub-id-type="medline">32184173</pub-id>
          <pub-id pub-id-type="pii">S1198-743X(20)30148-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Wan-Chi</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Chao-Shun</given-names>
            </name>
            <name name-style="western">
              <surname>Yeh</surname>
              <given-names>Chun-Chieh</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Hsin-Yun</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Yuarn-Jang</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>Chi-Li</given-names>
            </name>
            <name name-style="western">
              <surname>Cherng</surname>
              <given-names>Yih-Giun</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Ta-Liang</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>Chien-Chang</given-names>
            </name>
          </person-group>
          <article-title>Effect of influenza vaccination against postoperative pneumonia and mortality for geriatric patients receiving major surgery: a nationwide matched study</article-title>
          <source>J Infect Dis</source>
          <year>2018</year>
          <month>02</month>
          <day>14</day>
          <volume>217</volume>
          <issue>5</issue>
          <fpage>816</fpage>
          <lpage>826</lpage>
          <pub-id pub-id-type="doi">10.1093/infdis/jix616</pub-id>
          <pub-id pub-id-type="medline">29216345</pub-id>
          <pub-id pub-id-type="pii">4690590</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mirończuk</surname>
              <given-names>Mm</given-names>
            </name>
          </person-group>
          <article-title>Information extraction system for transforming unstructured text data in fire reports into structured forms: a Polish case study</article-title>
          <source>Fire Technol</source>
          <year>2019</year>
          <month>7</month>
          <day>26</day>
          <volume>56</volume>
          <issue>2</issue>
          <fpage>545</fpage>
          <lpage>581</lpage>
          <pub-id pub-id-type="doi">10.1007/s10694-019-00891-z</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Matthew C</given-names>
            </name>
            <name name-style="western">
              <surname>Ball</surname>
              <given-names>Robyn L</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Lingyao</given-names>
            </name>
            <name name-style="western">
              <surname>Moradzadeh</surname>
              <given-names>Nathaniel</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>Brian E</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>David B</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>Curtis P</given-names>
            </name>
            <name name-style="western">
              <surname>Amrhein</surname>
              <given-names>Timothy J</given-names>
            </name>
            <name name-style="western">
              <surname>Lungren</surname>
              <given-names>Matthew P</given-names>
            </name>
          </person-group>
          <article-title>Deep learning to classify radiology free-text reports</article-title>
          <source>Radiology</source>
          <year>2018</year>
          <month>03</month>
          <volume>286</volume>
          <issue>3</issue>
          <fpage>845</fpage>
          <lpage>852</lpage>
          <pub-id pub-id-type="doi">10.1148/radiol.2017171115</pub-id>
          <pub-id pub-id-type="medline">29135365</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Obermeyer</surname>
              <given-names>Ziad</given-names>
            </name>
            <name name-style="western">
              <surname>Emanuel</surname>
              <given-names>Ezekiel J</given-names>
            </name>
          </person-group>
          <article-title>Predicting the future - big data, machine learning, and clinical medicine</article-title>
          <source>N Engl J Med</source>
          <year>2016</year>
          <month>09</month>
          <day>29</day>
          <volume>375</volume>
          <issue>13</issue>
          <fpage>1216</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27682033"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMp1606181</pub-id>
          <pub-id pub-id-type="medline">27682033</pub-id>
          <pub-id pub-id-type="pmcid">PMC5070532</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sheikhalishahi</surname>
              <given-names>Seyedmostafa</given-names>
            </name>
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>Riccardo</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>Joel T</given-names>
            </name>
            <name name-style="western">
              <surname>Lavelli</surname>
              <given-names>Alberto</given-names>
            </name>
            <name name-style="western">
              <surname>Rinaldi</surname>
              <given-names>Fabio</given-names>
            </name>
            <name name-style="western">
              <surname>Osmani</surname>
              <given-names>Venet</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of clinical notes on chronic diseases: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>04</month>
          <day>27</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>e12239</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/2/e12239/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12239</pub-id>
          <pub-id pub-id-type="medline">31066697</pub-id>
          <pub-id pub-id-type="pii">v7i2e12239</pub-id>
          <pub-id pub-id-type="pmcid">PMC6528438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Feature engineering vs. deep learning for paper section identification: toward applications in Chinese medical literature</article-title>
          <source>Information Processing &#38; Management</source>
          <year>2020</year>
          <month>05</month>
          <volume>57</volume>
          <issue>3</issue>
          <fpage>102206</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2020.102206</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ong</surname>
              <given-names>Charlene Jennifer</given-names>
            </name>
            <name name-style="western">
              <surname>Orfanoudaki</surname>
              <given-names>Agni</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Rebecca</given-names>
            </name>
            <name name-style="western">
              <surname>Caprasse</surname>
              <given-names>Francois Pierre M</given-names>
            </name>
            <name name-style="western">
              <surname>Hutch</surname>
              <given-names>Meghan</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>Liang</given-names>
            </name>
            <name name-style="western">
              <surname>Fard</surname>
              <given-names>Darian</given-names>
            </name>
            <name name-style="western">
              <surname>Balogun</surname>
              <given-names>Oluwafemi</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>Matthew I</given-names>
            </name>
            <name name-style="western">
              <surname>Minnig</surname>
              <given-names>Margaret</given-names>
            </name>
            <name name-style="western">
              <surname>Saglam</surname>
              <given-names>Hanife</given-names>
            </name>
            <name name-style="western">
              <surname>Prescott</surname>
              <given-names>Brenton</given-names>
            </name>
            <name name-style="western">
              <surname>Greer</surname>
              <given-names>David M</given-names>
            </name>
            <name name-style="western">
              <surname>Smirnakis</surname>
              <given-names>Stelios</given-names>
            </name>
            <name name-style="western">
              <surname>Bertsimas</surname>
              <given-names>Dimitris</given-names>
            </name>
          </person-group>
          <article-title>Machine learning and natural language processing methods to identify ischemic stroke, acuity and location from radiology reports</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <volume>15</volume>
          <issue>6</issue>
          <fpage>e0234908</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0234908"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0234908</pub-id>
          <pub-id pub-id-type="medline">32559211</pub-id>
          <pub-id pub-id-type="pii">PONE-D-19-31481</pub-id>
          <pub-id pub-id-type="pmcid">PMC7304623</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>Soo-Yong</given-names>
            </name>
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>Yongman</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>Yongdon</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Hyo Joung</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Jihyun</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Woo-Sung</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Jae Ho</given-names>
            </name>
          </person-group>
          <article-title>Lessons learned from development of de-identification system for biomedical research in a Korean tertiary hospital</article-title>
          <source>Healthc Inform Res</source>
          <year>2013</year>
          <month>06</month>
          <volume>19</volume>
          <issue>2</issue>
          <fpage>102</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.e-hir.org/DOIx.php?id=10.4258/hir.2013.19.2.102"/>
          </comment>
          <pub-id pub-id-type="doi">10.4258/hir.2013.19.2.102</pub-id>
          <pub-id pub-id-type="medline">23882415</pub-id>
          <pub-id pub-id-type="pmcid">PMC3717433</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>Soo-Yong</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Yu Rang</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>Yongdon</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Hyo Joung</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Jihyun</given-names>
            </name>
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>Yongman</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Moo-Song</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Chang-Min</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Woo-Sung</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Jae Ho</given-names>
            </name>
          </person-group>
          <article-title>A de-identification method for bilingual clinical texts of various note types</article-title>
          <source>J Korean Med Sci</source>
          <year>2015</year>
          <month>01</month>
          <volume>30</volume>
          <issue>1</issue>
          <fpage>7</fpage>
          <lpage>15</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jkms.org/DOIx.php?id=10.3346/jkms.2015.30.1.7"/>
          </comment>
          <pub-id pub-id-type="doi">10.3346/jkms.2015.30.1.7</pub-id>
          <pub-id pub-id-type="medline">25552878</pub-id>
          <pub-id pub-id-type="pmcid">PMC4278030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guillaume</surname>
              <given-names>Lample</given-names>
            </name>
            <name name-style="western">
              <surname>Alexis</surname>
              <given-names>Conneau</given-names>
            </name>
            <name name-style="western">
              <surname>Ludovic</surname>
              <given-names>Denoyer</given-names>
            </name>
            <name name-style="western">
              <surname>Marc'Aurelio</surname>
              <given-names>Ranzato</given-names>
            </name>
          </person-group>
          <article-title>Unsupervised machine translation using monolingual corpora only</article-title>
          <year>2018</year>
          <conf-name>Sixth International Conference on Learning Representations</conf-name>
          <conf-date>Apr 30-May 3 2018</conf-date>
          <conf-loc>Vancouver</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1711.00043"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Comput</source>
          <year>1997</year>
          <month>11</month>
          <day>15</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
          <pub-id pub-id-type="medline">9377276</pub-id>
          <pub-id pub-id-type="pii">00005382-201903000-00002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dzmitry</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kyunghyun</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yoshua</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Neural machine translation by jointly learning to align and translate</article-title>
          <year>2015</year>
          <conf-name>3rd International Conference on Learning Representations</conf-name>
          <conf-date>2015 May 7-9</conf-date>
          <conf-loc>San Diego</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Seo</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Yun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>YH</given-names>
            </name>
            <name name-style="western">
              <surname>Vogel-Claussen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schiebler</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Gefter</surname>
              <given-names>WB</given-names>
            </name>
            <name name-style="western">
              <surname>van Beek</surname>
              <given-names>EJR</given-names>
            </name>
            <name name-style="western">
              <surname>Goo</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Hatabu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Deep learning applications in chest radiography and computed tomography: current state of the art</article-title>
          <source>J Thorac Imaging</source>
          <year>2019</year>
          <month>03</month>
          <volume>34</volume>
          <issue>2</issue>
          <fpage>75</fpage>
          <lpage>85</lpage>
          <pub-id pub-id-type="doi">10.1097/RTI.0000000000000387</pub-id>
          <pub-id pub-id-type="medline">30802231</pub-id>
          <pub-id pub-id-type="pii">00005382-201903000-00002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cox</surname>
              <given-names>Dr</given-names>
            </name>
          </person-group>
          <article-title>The regression analysis of binary sequences</article-title>
          <source>Journal of the Royal Statistical Society: Series B (Methodological)</source>
          <year>2018</year>
          <month>12</month>
          <day>05</day>
          <volume>20</volume>
          <issue>2</issue>
          <fpage>215</fpage>
          <lpage>232</lpage>
          <pub-id pub-id-type="doi">10.1111/j.2517-6161.1958.tb00292.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Corinna</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vladimir</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Support-vector networks</article-title>
          <source>Machine learning</source>
          <year>1995</year>
          <fpage>273</fpage>
          <lpage>297</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sebastiani</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Machine learning in automated text categorization</article-title>
          <source>ACM Comput. Surv</source>
          <year>2002</year>
          <month>03</month>
          <volume>34</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>47</lpage>
          <pub-id pub-id-type="doi">10.1145/505282.505283</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Soucy</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mineau</surname>
              <given-names>GW</given-names>
            </name>
          </person-group>
          <article-title>A simple KNN algorithm for text categorization</article-title>
          <year>2001</year>
          <conf-name>IEEE International Conference on Data Mining</conf-name>
          <conf-date>Nov 29-Dec 2 2001</conf-date>
          <conf-loc>San Jose</conf-loc>
          <publisher-name>IEEE</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Quinlan</surname>
              <given-names>Jr</given-names>
            </name>
          </person-group>
          <article-title>Induction of decision trees</article-title>
          <source>Mach Learn</source>
          <year>1986</year>
          <month>3</month>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>81</fpage>
          <lpage>106</lpage>
          <pub-id pub-id-type="doi">10.1007/bf00116251</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leo</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Machine learning</source>
          <year>2001</year>
          <month>1</month>
          <fpage>5</fpage>
          <lpage>32</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tomas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Greg</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jeffrey</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Efficient estimation of word representations in vector space</article-title>
          <year>2013</year>
          <conf-name>2013 International Conference on Learning Representations,</conf-name>
          <conf-date>May 2-May 4 2013</conf-date>
          <conf-loc>Scottsdale, AX</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Keiron</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <source>An introduction to convolutional neural networks</source>
          <year>2015</year>
          <month>12</month>
          <day>2</day>
          <access-date>2021-04-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1511.08458">https://arxiv.org/abs/1511.08458</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Greff</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Srivastava</surname>
              <given-names>RK</given-names>
            </name>
            <name name-style="western">
              <surname>Koutnik</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Steunebrink</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>LSTM: a search space odyssey</article-title>
          <source>IEEE Trans Neural Netw Learn Syst</source>
          <year>2017</year>
          <month>10</month>
          <volume>28</volume>
          <issue>10</issue>
          <fpage>2222</fpage>
          <lpage>2232</lpage>
          <pub-id pub-id-type="doi">10.1109/TNNLS.2016.2582924</pub-id>
          <pub-id pub-id-type="medline">27411231</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ranzani</surname>
              <given-names>OT</given-names>
            </name>
            <name name-style="western">
              <surname>Prina</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Menéndez</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ceccato</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cilloniz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Méndez</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gabarrus</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Barbeta</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bassi</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Torres</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>New sepsis definition (sepsis-3) and community-acquired pneumonia mortality. A validation and clinical decision-making study</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2017</year>
          <month>11</month>
          <day>15</day>
          <volume>196</volume>
          <issue>10</issue>
          <fpage>1287</fpage>
          <lpage>1297</lpage>
          <pub-id pub-id-type="doi">10.1164/rccm.201611-2262OC</pub-id>
          <pub-id pub-id-type="medline">28613918</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Yanshan</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>Sunghwan</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Sijia</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Feichen</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Liwei</given-names>
            </name>
            <name name-style="western">
              <surname>Atkinson</surname>
              <given-names>Elizabeth J</given-names>
            </name>
            <name name-style="western">
              <surname>Amin</surname>
              <given-names>Shreyasee</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Hongfang</given-names>
            </name>
          </person-group>
          <article-title>A clinical text classification paradigm using weak supervision and deep representation</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <month>01</month>
          <day>07</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-018-0723-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-018-0723-6</pub-id>
          <pub-id pub-id-type="medline">30616584</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-018-0723-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6322223</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Alelaiwi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>Smm</given-names>
            </name>
            <name name-style="western">
              <surname>Shamim Hossain</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Multiple disease risk assessment with uniform model based on medical clinical notes</article-title>
          <source>IEEE Access</source>
          <year>2016</year>
          <volume>4</volume>
          <fpage>7074</fpage>
          <lpage>7083</lpage>
          <pub-id pub-id-type="doi">10.1109/ACCESS.2016.2614541</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
