<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i11e29120</article-id>
      <article-id pub-id-type="pmid">34723829</article-id>
      <article-id pub-id-type="doi">10.2196/29120</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Stroke Outcome Measurements From Electronic Medical Records: Cross-sectional Study on the Effectiveness of Neural and Nonneural Classifiers</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Hao</surname>
            <given-names>Tianyong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Fernandes</surname>
            <given-names>Marta</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Diao</surname>
            <given-names>Xiaolin</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Zanotto</surname>
            <given-names>Bruna Stella</given-names>
          </name>
          <degrees>MSc, PharmD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4012-1395</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Beck da Silva Etges</surname>
            <given-names>Ana Paula</given-names>
          </name>
          <degrees>Eng, MSc, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6411-3480</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>dal Bosco</surname>
            <given-names>Avner</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7214-1767</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Cortes</surname>
            <given-names>Eduardo Gabriel</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0272-1982</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Ruschel</surname>
            <given-names>Renata</given-names>
          </name>
          <degrees>PT</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7356-119X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>De Souza</surname>
            <given-names>Ana Claudia</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8722-9988</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Andrade</surname>
            <given-names>Claudio M V</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7366-2633</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Viegas</surname>
            <given-names>Felipe</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8121-8607</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Canuto</surname>
            <given-names>Sergio</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2973-4158</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Luiz</surname>
            <given-names>Washington</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1988-8412</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Ouriques Martins</surname>
            <given-names>Sheila</given-names>
          </name>
          <degrees>MSc, MD, PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8452-712X</ext-link>
        </contrib>
        <contrib id="contrib12" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Vieira</surname>
            <given-names>Renata</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2449-5477</ext-link>
        </contrib>
        <contrib id="contrib13" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Polanczyk</surname>
            <given-names>Carisi</given-names>
          </name>
          <degrees>MSc, MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2447-2577</ext-link>
        </contrib>
        <contrib id="contrib14" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>André Gonçalves</surname>
            <given-names>Marcos</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <address>
            <institution>Computer Science Department</institution>
            <institution>Universidade Federal de Minas Gerais</institution>
            <addr-line>Avenue Antônio Carlos, 6627</addr-line>
            <addr-line>Belo Horizonte, 31270-901</addr-line>
            <country>Brazil</country>
            <phone>55 3134095860</phone>
            <email>mgoncalv@dcc.ufmg.br</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2075-3363</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>National Institute of Health Technology Assessment - INCT/IATS (CNPQ 465518/2014-1)</institution>
        <institution>Universidade Federal do Rio Grande do Sul</institution>
        <addr-line>Porto Alegre</addr-line>
        <country>Brazil</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Graduate Program in Epidemiology</institution>
        <institution>Universidade Federal do Rio Grande do Sul</institution>
        <addr-line>Porto Alegre</addr-line>
        <country>Brazil</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Technology</institution>
        <institution>Pontifícia Universidade Católica do Rio Grande do Sul</institution>
        <addr-line>Porto Alegre</addr-line>
        <country>Brazil</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Graduate Program of Computer Science</institution>
        <institution>Universidade Federal do Rio Grande do Sul</institution>
        <addr-line>Porto Alegre</addr-line>
        <country>Brazil</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Brazilian Stroke Network</institution>
        <institution>Hospital Moinhos de Vento</institution>
        <addr-line>Porto Alegre</addr-line>
        <country>Brazil</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Computer Science Department</institution>
        <institution>Universidade Federal de Minas Gerais</institution>
        <addr-line>Belo Horizonte</addr-line>
        <country>Brazil</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Centro Interdisciplinar de História, Culturas e Sociedades (CIDEHUS)</institution>
        <institution>Universidade de Évora</institution>
        <addr-line>Évora</addr-line>
        <country>Portugal</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Marcos André Gonçalves <email>mgoncalv@dcc.ufmg.br</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>1</day>
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>11</issue>
      <elocation-id>e29120</elocation-id>
      <history>
        <date date-type="received">
          <day>29</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>30</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>27</day>
          <month>6</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>5</day>
          <month>8</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Bruna Stella Zanotto, Ana Paula Beck da Silva Etges, Avner dal Bosco, Eduardo Gabriel Cortes, Renata Ruschel, Ana Claudia De Souza, Claudio M V Andrade, Felipe Viegas, Sergio Canuto, Washington Luiz, Sheila Ouriques Martins, Renata Vieira, Carisi Polanczyk, Marcos André Gonçalves. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 01.11.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/11/e29120" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>With the rapid adoption of electronic medical records (EMRs), there is an ever-increasing opportunity to collect data and extract knowledge from EMRs to support patient-centered stroke management.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to compare the effectiveness of state-of-the-art automatic text classification methods in classifying data to support the prediction of clinical patient outcomes and the extraction of patient characteristics from EMRs.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Our study addressed the computational problems of information extraction and automatic text classification. We identified essential tasks to be considered in an ischemic stroke value-based program. The 30 selected tasks were classified (manually labeled by specialists) according to the following value agenda: tier 1 (achieved health care status), tier 2 (recovery process), care related (clinical management and risk scores), and baseline characteristics. The analyzed data set was retrospectively extracted from the EMRs of patients with stroke from a private Brazilian hospital between 2018 and 2019. A total of 44,206 sentences from free-text medical records in Portuguese were used to train and develop 10 supervised computational machine learning methods, including state-of-the-art neural and nonneural methods, along with ontological rules. As an experimental protocol, we used a 5-fold cross-validation procedure repeated 6 times, along with <italic>subject-wise sampling</italic>. A heatmap was used to display comparative result analyses according to the best algorithmic effectiveness (F1 score), supported by statistical significance tests. A feature importance analysis was conducted to provide insights into the results.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The top-performing models were support vector machines trained with lexical and semantic textual features, showing the importance of dealing with noise in EMR textual representations. The support vector machine models produced statistically superior results in 71% (17/24) of tasks, with an F1 score &#62;80% regarding care-related tasks (patient treatment location, fall risk, thrombolytic therapy, and pressure ulcer risk), the process of recovery (ability to feed orally or ambulate and communicate), health care status achieved (mortality), and baseline characteristics (diabetes, obesity, dyslipidemia, and smoking status). Neural methods were largely outperformed by more traditional nonneural methods, given the characteristics of the data set. Ontological rules were also effective in tasks such as baseline characteristics (alcoholism, atrial fibrillation, and coronary artery disease) and the Rankin scale. The complementarity in effectiveness among models suggests that a combination of models could enhance the results and cover more tasks in the future.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Advances in information technology capacity are essential for scalability and agility in measuring health status outcomes. This study allowed us to measure effectiveness and identify opportunities for automating the classification of outcomes of specific tasks related to clinical conditions of stroke victims, and thus ultimately assess the possibility of proactively using these machine learning techniques in real-world situations.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>stroke</kwd>
        <kwd>outcomes</kwd>
        <kwd>electronic medical records</kwd>
        <kwd>EHR</kwd>
        <kwd>electronic health records</kwd>
        <kwd>text processing</kwd>
        <kwd>data mining</kwd>
        <kwd>text classification</kwd>
        <kwd>patient outcomes</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Stroke is the second leading cause of mortality and disability-adjusted life years globally [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. The outcomes of stroke can vary greatly, and timely assessment is essential for optimal management. As such, there has been an increasing interest in the use of automated machine learning (ML) techniques to track stroke outcomes, with the hope that such methods could make use of large, routinely collected data sets and deliver accurate, personalized prognoses [<xref ref-type="bibr" rid="ref3">3</xref>]. However, studies applying ML methods to stroke, although published regularly, have focused mostly on stroke imaging applications [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>] and structured data retrieval [<xref ref-type="bibr" rid="ref3">3</xref>]. Few studies have addressed the unstructured textual portion of electronic medical records (EMRs) as the primary source of information.</p>
        <p>Indeed, the use of EMR data in the last decade has led to promising findings in population health research, such as patient-use stratification [<xref ref-type="bibr" rid="ref7">7</xref>], treatment-effectiveness evaluation [<xref ref-type="bibr" rid="ref8">8</xref>], early detection of diseases [<xref ref-type="bibr" rid="ref9">9</xref>], and predictive modeling [<xref ref-type="bibr" rid="ref10">10</xref>]. However, dealing with EMR data is often labor intensive [<xref ref-type="bibr" rid="ref11">11</xref>] and challenging because of the lack of standardization in data entry, changes in coding procedures over time, and the impact of missing information [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. The information technology (IT) gap between automated data collection from EMRs and improving the quality of care has been described in the literature as a decelerator of value initiatives [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref18">18</xref>].</p>
        <p>With recent advances in IT, several groups have attempted to apply natural language processing (NLP) to the text analysis of EMRs to achieve early diagnosis of multiple conditions, such as peripheral arterial disease [<xref ref-type="bibr" rid="ref19">19</xref>], asthma [<xref ref-type="bibr" rid="ref20">20</xref>], multiple sclerosis [<xref ref-type="bibr" rid="ref21">21</xref>], and heart failure [<xref ref-type="bibr" rid="ref22">22</xref>]. In these studies, NLP was used to find specific words or phrases in a predefined dictionary that described the symptoms or signs of each disease [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref23">23</xref>].</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>Generating value for the patient as the central guide requires advances in strategies to automate the capturing of data that will allow managers to assess the quality of service delivery to patients [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. Accordingly, our research aims to compare the effectiveness of state-of-the-art automatic text classification methods in classifying data to support the prediction of clinical patient outcomes and the extraction of patient characteristics from EMR sentences. With stroke as our case study application, our specific goal is to investigate the capability of these methods to automatically identify, with reasonable effectiveness, the outcomes and clinical characteristics of patients from EMRs that may be considered in a stroke outcome measurement program.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>This study faced a computational problem related to information extraction and free-text classification. As presented in <xref rid="figure1" ref-type="fig">Figure 1</xref>, the dotted lines represent the union of the text representative technique that was used with each classifier in the two-phase experiments. Our study was generally organized into four stages: (1) task selection; (2) study design, preprocessing, and data annotation; (3) definition of automatic text classification methods; and (4) experimental evaluation (experimental protocol, setup, and analysis of results).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Study architecture. BERT: bidirectional encoder representation from transformers; CNN: convolutional neural network; EHR: electronic health record; KNN: K-nearest neighbor; SVM: support vector machine; TF-IDF: term frequency-inverted document frequency.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e29120_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Task Selection</title>
        <p>A literature review and multidisciplinary expert interviews (n=8) were used to define specific outcome dimensions and measures that may be considered in an outcome measurement program for ischemic stroke. The outcome identification step was based on adhering to value agenda element dimensions to cover the tiers of the outcome hierarchy [<xref ref-type="bibr" rid="ref26">26</xref>], such as functionality dimensions, the recovery process, and outcomes that matter to patients. These dimensions included risk events, achieved health care status, and stroke outcome scales, such as the National Institutes of Health Stroke Scale (NIHSS) and the modified Rankin scale (mRS) [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>].</p>
      </sec>
      <sec>
        <title>Study Design and Data Annotation</title>
        <p>We retrospectively built a database of medical records from a digital hospital system. The database covered 2 years of patients hospitalized for ischemic stroke. The hospital is a private institution of excellence in southern Brazil. The EMR system used was the MV Soul (Recife). Since 2017, the hospital has introduced the ICHOM standard sets’ data collection routine for different clinical pathways and created an office for institutional values. To examine the stroke pathway, data were collected on October 15, 2015. In 2019, the hospital incorporated the Angel Awards Program [<xref ref-type="bibr" rid="ref29">29</xref>], which was certified as a platinum category at the end of the first year. This study was approved by the hospital ethics committee (CAAE: 29694720000005330).</p>
        <p>Medical records of patients were submitted to preprocessing using the spaCy Python library (Python Software Foundation; Python Language Reference, version 2.7) [<xref ref-type="bibr" rid="ref30">30</xref>] to stratify texts into sentences. A total of 44,206 EMR sentences were obtained from 188 patients. The approach followed a hypothesis for managing unbalanced data, such as electronic health records, which assumes that relevant information to be retrieved from EMRs encompasses a small space of words delimited as sentences, and the residual is noise [<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref33">33</xref>]. During the text stratification process, spaCy [<xref ref-type="bibr" rid="ref30">30</xref>] uses rule-based algorithms that set the sentence limits according to the patterns of characters, thereby delimiting its beginning and end. The names of patients and medical staff were identified, thus removing all confidential information from the data set. The preprocessed textual sentence was represented in a vector of words that disregarded grammar and word order but maintained their multiplicity.</p>
        <p>For sentence annotation (intratask class labeling), we developed annotation guidelines that provided an explicit definition of each task, its classes (response options), and examples to be identified in the documents. This guideline is written in Portuguese and is available upon request.</p>
        <p>Two annotators independently reviewed the preprocessed text documents (44,206 sentences) and had the percent agreement between them measured by κ, which was higher than 0.61 (substantial agreement) [<xref ref-type="bibr" rid="ref34">34</xref>]. Task-level disagreements were resolved by consensus determination by 2 annotators, with assistance from a committee composed of experts (APE, ACS, MP, KBR, and CAP).</p>
        <p>Each task could have two or more output answers, depending on the meaning of the sentence. Examples of an EMR and the annotation process can be seen in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref> and <xref ref-type="supplementary-material" rid="app2">2</xref>. Task details in terms of class and sentence distribution are shown in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> and demonstrate the highly imbalanced nature of the tasks with most of the sentences belonging to the NI (noninformative) class. This makes it a very hard endeavor from an ML perspective. Subsequently, we evaluated the impact of this imbalance in the experimental results.</p>
      </sec>
      <sec>
        <title>Automatic Text Classification Methods</title>
        <p>As presented in the study design, the ML methods were divided into two categories: two-phase methods and end-to-end (E2E) methods [<xref ref-type="bibr" rid="ref35">35</xref>]. The first category of methods consisted of approaches whose document (ie, sentence) representation was intrinsically independent of the classification algorithm used to predict the class. In other words, the classifier used to predict the class of documents was not used in the construction phase of the document representation. In terms of text representations, we considered three alternatives, namely traditional term-weighting alternatives (term frequency-inverted document frequency [TFIDF]); weighting based on word and character (n-gram) frequency; and recent representations based on meta-features, which capture statistical information from a document’s neighborhood and have obtained state-of-the-art effectiveness in recent benchmarks [<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref39">39</xref>].</p>
        <p>As two-phase classification algorithms, we exploited support vector machines (SVMs), which are still considered the most robust nonneural network text classification algorithm [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>], random forests (RF), K-nearest neighbor (KNN), and naïve Bayes classifier (NBC), to address the most popular algorithms in terms of classification and retrieval of text information [<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref44">44</xref>].</p>
        <p>In contrast, E2E methods use a discriminative classifier function to transform the document representation space into a new and more informed (usually more reduced and compact) space and use this classifier to predict the document class. In general, these approaches use an iterative process of representation, classification, evaluation, and parameter adaptation (eg, transform, predict, evaluate loss function, and backpropagate, respectively). For E2E classifiers, we exploited two neural architectures, namely convolutional neural networks (CNNs), which exploit textual patterns such as word co-occurrences, and bidirectional encoder representation from transformers (BERT), which exploits attention mechanisms and constitute the current state-of-the-art in many NLP tasks.</p>
        <p>Finally, we exploited a rule-based classifier specialized for the tasks at hand (stroke tasks, represented in the ontology web language [OWL]). The rule-based knowledge model was developed using logical conditions built alongside domain specialists [<xref ref-type="bibr" rid="ref45">45</xref>]. This technique has shown effectiveness equivalent to that of some ML classification models in certain domains without the need for a large amount of data and training time, which are commonly required by supervised methods [<xref ref-type="bibr" rid="ref46">46</xref>-<xref ref-type="bibr" rid="ref49">49</xref>]. In contrast, it is heavily dependent on the specialists and the coverage of the rules on the text expressions. More details about each of the exploited algorithms are provided in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref50">50</xref>-<xref ref-type="bibr" rid="ref63">63</xref>].</p>
        <p>The two-phase methods used in this research are referred to as the representation technique combined with the classification algorithm, as follows: word-TFIDF and character-TFIDF combined with SVM (SVM+W+C), Bag-of-Words (BoW) combined with SVM (SVM+BoW), meta-features combined with SVM (meta-features), word-TFIDF combined with SVM (SVM+Word-TFIDF), character-TFIDF combined with SVM (SVM+Chard-TFIDF), Word-TFIDF combined with random forest (RF+Word-TFIDF), word-TFIDF combined with KNN (KNN+Word-TFIDF), and word-TFIDF combined with naïve Bayes (Naïve Bayes+Word-TFIDF). In contrast to TFIDF, BoW explores only the frequency of terms (term frequency) and not the frequency of terms in the collection (IDF component). The E2E methods are simply called CNN and BERT, and the ontological method is called OWL.</p>
      </sec>
      <sec>
        <title>Experimental Evaluation</title>
        <sec>
          <title>Overview</title>
          <p>The experimental process consisted of testing different classification methods with sets of annotated data to assess and compare their performances (effectiveness). The experimental procedure, described in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>, consisted of four phases: (1) representing the free-text sentences as numerical vectors, (2) the training and tuning process (in a validation set) by means of a folded cross-validation procedure, (3) the execution of the classification algorithms in the test set and effectiveness assessment, and (4) the synthesis of the results in a heatmap table.</p>
          <p>A classification model was developed for each task. Each task resulted in an individual automatic classification model for the training and testing process of the model. As an experimental protocol, we used a five-fold cross-validation procedure repeated six times (resulting in 30 test samples). We also exploited <italic>subject-wise cross-validation</italic> in the sense that the information from the same patient was always assigned to the same fold to test the ability of the model to predict new data that was not used in the learning process. These procedures address potential problems, such as overfitting and selection bias [<xref ref-type="bibr" rid="ref64">64</xref>], and produce results that are more reliable.</p>
          <p>To evaluate the ability to classify the relevant Brazilian-Portuguese medical free-text records correctly, we used the Macro-F1 score (equation 1). This metric is based on a <italic>confusion matrix</italic> and is defined as follows:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v9i11e29120_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where TP is true positive, TN is true negative, FP is false positive, and FN is false negative. Precision (positive predictive value) = TP / TP + FP = the number of returned hits that were true positive. Recall (sensibility) = TP / TP + FN = is the fraction of the total number of true positives retrieved.</p>
          <p>The F1 measure is calculated for each class. Macro-F1 summarizes the classification effectiveness by averaging F1 values for all classes. Macro-F1 is one of the most popular aggregated evaluation metrics for the classifier evaluation of unbalanced or skewed data sets [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref66">66</xref>]. Macro-F1 is especially suitable for imbalanced data sets, as the effectiveness of each individual class contributes equally to producing a final score. For instance, in a task with four classes, in which one of them is NI, if all classes are predicted as NI, the Macro-F1 score will be no higher than 0.25 (F1 of 1 for NI and 0 for the three other classes). Accuracy or any other evaluation measure focused on the instance, instead of the class effectiveness, would produce a very high score (close to 1 in this particular case).</p>
          <p>To compare the average results of our cross-validation experiments, we assessed statistical significance by using a paired two-tailed <italic>t</italic> test with 95% CIs. To account for multiple tests, we adopted the Friedman-Nemenyi test [<xref ref-type="bibr" rid="ref67">67</xref>] with Bonferroni correction for multiple comparisons of mean rank sums. The Friedman test was used to compare multiple methods.</p>
          <p>We consider that making the data and the code used in our experimental protocol available to others is potentially useful for reproducibility and for use in other studies. Both the code and data will be available upon request. The mood-specific parameter tuning details are presented in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>.</p>
        </sec>
        <sec>
          <title>Experimental Analysis</title>
          <p>The experiments aimed to provide relationships between the classification methods and the tasks, allowing for connecting the best methods with each outcome measure or patient characteristics. Considering that the model’s results can influence health decision-making in some way, the F1 score thresholds may vary depending on the type of class and the imbalance of the data. We reported the results by means of a heatmap, adopting a red color for F1&#60;20%, a gradual color scale from orange to yellow for 21%&#60;F1&#60;79%, and green for F1&#62;80% [<xref ref-type="bibr" rid="ref68">68</xref>-<xref ref-type="bibr" rid="ref71">71</xref>]. Tasks (represented by the lines) were ordered by the average of the performed models, whereas the ordering of the columns shows the rank position of each method according to the statistical analysis.</p>
          <p>For the sake of the fairness of the comparison, the OWL technique should not be and is not directly compared and ranked herein along with the other ML models described above that require a combination of text representations with trained classification algorithms. OWL rules were designed to work with the entire corpus (including the test) and were not designed for generalization. Instead, they are built to work well in the specific domain or task for which they were created. In any case, for reasons of practical application and as a research exercise, as a secondary analysis, we compared (later) the OWL technique with the ML model ranked as the best based on the Friedman test. This analysis allowed us to identify the weaknesses and strengths of both approaches (generalized ML models vs domain or task-specific ontological rules) in the contrasting tasks.</p>
          <p>Moreover, we performed a feature selection analysis [<xref ref-type="bibr" rid="ref72">72</xref>,<xref ref-type="bibr" rid="ref73">73</xref>]. This technique is used to rank the most informative features of each task according to the information theory criteria. In particular, we used SelectKBest (Python Software Foundation; Python Language Reference, version 2.7) with the chi-square, which is independent of the classification algorithms used [<xref ref-type="bibr" rid="ref74">74</xref>]. This final analysis helps in understanding how ML can help with outcome measurements for the stroke care pathway, potentially boosting advances in quality indicator automation.</p>
          <p>Finally, to complete the analysis and evaluate the impact of the highly skewed distribution, especially toward the NI class, we ran an experiment in which we performed a random undersampling process for all considered tasks (we used the RandomUnderSampler Phyton library [<xref ref-type="bibr" rid="ref75">75</xref>]). In detail, we randomly selected the same number of training random examples of the NI as the number of instances of the second largest (non-NI) class of a given task. We then reran all ML classifiers (the ontology method is not affected by this process as it has no training) in all 24 tasks, considering as the training set the reduced (undersampled) NI training samples along with the same (unchanged) previous samples for the other classes. We did that for all six rounds of five-fold cross-validation of our experimental procedure, changing the seed for selection in each round, resulting in six different NI reduced training sets. The test folds in all cases remain unchanged, meaning that we keep the same skewed distribution as in the original data set, as we do not know the class of the test instances.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Tasks Selection</title>
        <p>Discussions with experts in the stroke care pathway allowed us to define 30 tasks that were considered feasible to extract from EMRs. For the first tier, the standard sets were usually defined to evaluate the clinical stroke outcomes that were used, including the mRS [<xref ref-type="bibr" rid="ref27">27</xref>] and the NIHSS scales [<xref ref-type="bibr" rid="ref76">76</xref>], in addition to traditional outcomes such as mortality and pain level. For tier 2, the ICHOM standard set developed for ischemic stroke was used [<xref ref-type="bibr" rid="ref77">77</xref>], which considers measures of mobility, ability to communicate, ability to feed orally, the ability to understand, and measures and scales of strength level. Indicators of the hospitalization care process used in the institution were also included, such as rating scales and risk events tracked by fall risk, pressure ulcer risk, fall events during hospitalization, infection indicators, intracranial hemorrhage, therapy care (thrombolytic, thrombectomy, or both), and the location of the patient during the inpatient path [<xref ref-type="bibr" rid="ref78">78</xref>]. Finally, baseline characteristics important for tracking the population and further risk-adjusted analysis were included [<xref ref-type="bibr" rid="ref79">79</xref>], such as high blood pressure, smoking status, coronary artery disease, atrial fibrillation, diabetes, prior stroke, active cancer, alcoholism, obesity, and dyslipidemia. Each category, containing the tasks and their respective classes, is presented in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Eligible tasks for analysis and classification rules.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="400"/>
            <col width="190"/>
            <col width="380"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Tasks</td>
                <td>Number of classes</td>
                <td>Supporting information for classes</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Health care status achieved (tier 1)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rankin</td>
                <td>8</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>0-6</p>
                    </list-item>
                    <list-item>
                      <p>NI<sup>a</sup></p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>National Institutes of Health Stroke Scale</td>
                <td>42</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>1-41</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Death</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Absence of vital signs</p>
                    </list-item>
                    <list-item>
                      <p>Vital signs present</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Process of recovery (tier 2)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mobility level</td>
                <td>16</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>1-15</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Self-care</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Able</p>
                    </list-item>
                    <list-item>
                      <p>Unable</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pain</td>
                <td>4</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>No pain</p>
                    </list-item>
                    <list-item>
                      <p>Low to intermediate pain</p>
                    </list-item>
                    <list-item>
                      <p>Intense pain</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Strength</td>
                <td>7</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>0-5</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Paresis</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ability to feed orally</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ability to communicate</td>
                <td>4</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>Poorly or symptomatic</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ability of understanding</td>
                <td>4</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>Poorly or symptomatic</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ability to ambulate</td>
                <td>4</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>Poorly or symptomatic</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Treatment or care related</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Thrombolytic therapy</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>No delta</p>
                    </list-item>
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Thrombectomy</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>No delta</p>
                    </list-item>
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Location</td>
                <td>4</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Emergency room</p>
                    </list-item>
                    <list-item>
                      <p>ICU<sup>b</sup></p>
                    </list-item>
                    <list-item>
                      <p>Inpatient unit</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Infection indication</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Intracranial hemorrhage</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Fall risk</td>
                <td>4</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Low risk</p>
                    </list-item>
                    <list-item>
                      <p>Moderate risk</p>
                    </list-item>
                    <list-item>
                      <p>High risk</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pressure ulcer risk</td>
                <td>4</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Low risk</p>
                    </list-item>
                    <list-item>
                      <p>Moderate risk</p>
                    </list-item>
                    <list-item>
                      <p>High risk</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Fall event during inpatient</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Baseline characteristics</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>High blood pressure</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Smoking status</td>
                <td>4</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>Former</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Coronary artery disease</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Atrial fibrillation</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Diabetes</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Prior stroke</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Cancer</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Alcoholism</td>
                <td>4</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>Former</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Obesity</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Dyslipidemia</td>
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Yes</p>
                    </list-item>
                    <list-item>
                      <p>No</p>
                    </list-item>
                    <list-item>
                      <p>NI</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>NI: noninformative.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>ICU: intensive care unit.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>After the identification of all tasks and the annotation process, the analysis proceeded only with tasks that had substantial (0.61&#62;κ&#62;0.80) and almost perfect (κ≥0.81) agreement between annotators [<xref ref-type="bibr" rid="ref34">34</xref>]. A total of six tasks were excluded from the final analysis because of moderate or fair agreement or disagreement: (1) active cancer information, (2) strength level, (3) intracranial hemorrhage, (4) ability to understand, (5) self-care, and (6) fall events during inpatient visits. All documents were labeled by the annotators, and the median κ regarding the 24 remaining tasks was 0.74 (IQR 0.65-0.89; substantial agreement).</p>
      </sec>
      <sec>
        <title>Patient Characteristics</title>
        <p>The descriptive characteristics of patients, including previous comorbidities, NIHSS score, and clinical care, are presented in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Descriptive characteristics of the patients.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="400"/>
            <col width="0"/>
            <col width="350"/>
            <col width="0"/>
            <col width="220"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Characteristics</td>
                <td colspan="3">Patients with ischemic stroke evaluated (n=188)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">Values, median (range)</td>
                <td>Values, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">Age (years)</td>
                <td colspan="2">79 (68-87)</td>
                <td>N/A<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="3">LOS<sup>b</sup> (days)</td>
                <td colspan="2">6 (4-12)</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Sex</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td colspan="2">N/A</td>
                <td colspan="2">100 (53)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td colspan="2">N/A</td>
                <td colspan="2">88 (47)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Comorbidities</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Previous stroke</td>
                <td colspan="2">N/A</td>
                <td colspan="2">38 (20)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Previous coronary artery disease</td>
                <td colspan="2">N/A</td>
                <td colspan="2">12 (6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Atrial fibrillation</td>
                <td colspan="2">N/A</td>
                <td colspan="2">33 (18)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Diabetes</td>
                <td colspan="2">N/A</td>
                <td colspan="2">53 (28)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hypertension</td>
                <td colspan="2">N/A</td>
                <td colspan="2">125 (66)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Smoking status</td>
                <td colspan="2">N/A</td>
                <td colspan="2">15 (8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Alcoholism</td>
                <td colspan="2">N/A</td>
                <td colspan="2">4 (2)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Treatment and care related</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Antithrombotic therapy</td>
                <td colspan="2">N/A</td>
                <td colspan="2">131 (70)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Thrombolysis with rtPA<sup>c</sup></td>
                <td colspan="2">N/A</td>
                <td colspan="2">38 (20)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Thrombectomy</td>
                <td colspan="2">N/A</td>
                <td colspan="2">12 (6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Thrombolysis and thrombectomy</td>
                <td colspan="2">N/A</td>
                <td colspan="2">7(4)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>NIHSS<sup>d</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;8</td>
                <td colspan="2">N/A</td>
                <td colspan="2">147 (78)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#62;8 and &#60;15</td>
                <td colspan="2">N/A</td>
                <td colspan="2">24 (13)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#62;15</td>
                <td colspan="2">N/A</td>
                <td colspan="2">17 (9)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>N/A: not applicable.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>LOS: length of stay.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>rtPA: alteplase.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>NIHSS: National Institutes of Health Stroke Scale.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Experimental Results</title>
        <p>The Macro-F1 values for each of the 24 tasks using the 10 compared models are shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. Considering each task separately, there is no single method that always dominates, and there is no agreement on a unique category of tasks that perform better. The ML models SVM+W+C and SVM+BoW were the best and most consistent techniques used in this data set. Both techniques use term-weighting representations that are used alongside SVM classifiers. The latter simply exploits within-document word term frequencies (term frequency), whereas the former, in addition to exploiting data set–oriented term statistics (IDF), also builds character-based n-gram representations of the words in the vocabulary. The character-based n-grams, despite increasing the vocabulary size and sparsity, help to deal with misspellings and word variations that are common in EMRs, which might explain the SVM+W+C good results.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Results of Macro-F1 for each task and comparative models (expressed in percentage). BERT: bidirectional encoder representation from transformers; CNN: convolutional neural network; mRS: Modified Rankin Score; NIHSS: National Institutes of Health Stroke Scale; SVM+BoW: support vector machine plus Bag-of-Words; TFIDF: term frequency-inverted document frequency; W+C+SVM: word-term frequency-inverted document frequency and character-term frequency-inverted document frequency combined with support vector machine.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e29120_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The SVM+W+C model excels in tasks belonging to different categories, such as the ability to feed orally (Tier 2: the process of recovery), with an F1 score of 89.5% (95% CI 89.2%-89.8%); death (tier 1: health care status achieved), with an F1 score of 89.5% (95% CI 87.5%-92.5%); and high blood pressure and dyslipidemia (the baseline characteristics of patients), with F1 scores of 86% (95% CI 83.8%-88.2%) and 83.2% (95% CI 77%-89%), respectively. SVM+BoW, in turn, excels in tasks belonging to the treatment- or care-related categories, such as patient location during treatment (F1 score 89.4%; 95% CI 88%-91%), fall risk (F1 score 91.1%; 95% CI 90.1%-92.1%), and pressure ulcer risk (F1 score 92.5; 95% CI 91.5%-93.5%). The meta-features model, which also exploits SVM as a classifier but uses a completely different text representation, was on average, the third-best placed ML model to cover more tasks with good effectiveness, except in tasks such as diabetes (F1 score 90.1%; 95% CI 88.8%-91.4%) and thrombolytic therapy (F1 score 88.6%; 95% CI 87.5%-90.1%), in which it was the sole winner model (best performer with no ties). The models that used SVM but exploited either only word- or character-based representations came in the fourth and fifth places, losing to methods that exploited both representations in a conjugated way.</p>
        <p>The neural methods CNN and BERT were grouped in the middle, with only moderate effectiveness in most tasks. This outcome is mostly due to the lack of sufficient training data for the optimal deployment of these methods. Indeed, previous work has demonstrated that neural solutions are not adequate for tasks with low to moderate training data, and they can only outperform other more traditional ML methods in text classification tasks when presented with massive amounts of training [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], which is generally uncommon in the health domain.</p>
        <p>Regarding the effectiveness of the tasks, patient characteristics and care-related process tasks produced better effectiveness. Five of them are examples of good adherence with multiple models, including patient treatment location, fall risk, thrombolytic therapy, diabetes, and paresis, all with multiple models with high effectiveness. Tasks related to measures of mobility, ability to communicate, ability to ambulate, and pain did not achieve high Macro-F1 values in most models.</p>
        <p>The tasks with many classes, such as NIHSS (42 classes), mobility level (n=16), and Rankin (n=8), performed worse, regardless of the model. This outcome is mostly due to issues related to the very skewed distribution (high imbalance) found in our unstructured real-life data set. Indeed, the high percentage of NI in the document penalizes effectiveness, mainly for the minor classes, which are captured more faithfully by the Macro-F1 score. However, properly dealing with such an imbalance is not a simple task, as discussed next. Finally, as the sentence length was very similar across tasks and classes, this factor did not affect the results, that is, we could not infer any significant relationship between the mean number of words per sentence and the Macro-F1 scores of the models.</p>
        <p><xref rid="figure3" ref-type="fig">Figure 3</xref> provides information regarding the effectiveness of the OWL classifier. In general, the OWL effectiveness is similar to that of the best ML models, with 11 tasks having a Macro-F1 score higher than 80%. The most interesting issue is that most of the best-performing tasks by OWL <italic>do not coincide</italic> with the best ones produced by the ML models in <xref rid="figure2" ref-type="fig">Figure 2</xref>. For instance, the OWL classifier performed very well on the patient's baseline characteristics tasks, such as NIHSS and mRS scale, precisely the ones in which the ML models performed poorly. Overall, the OWL strategy was more robust in the tasks in which the ML models suffered from a scarcity of examples and high imbalance. On the contrary, OWL suffered on tasks that were much more passible in interpretation and had more text representations from those for which they were built [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref80">80</xref>]. For instance, in the <italic>death</italic> task, despite good within-annotator agreement, we believe that due to a variety of clinical terms in the clinical text used to describe multiple clinical concepts, the rules initially created failed to reflect the understanding of a noninformative sentence versus a sentence that reports the vital signs of patients, which penalized the OWL model.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Effectiveness results for the ontology-based model. mRS: Modified Rankin Score; NIHSS: National Institutes of Health Stroke Scale.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e29120_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>A direct comparison between OWL and the best ML method is presented in <xref rid="figure4" ref-type="fig">Figures 4</xref> and <xref rid="figure5" ref-type="fig">5</xref>, in which <xref rid="figure4" ref-type="fig">Figure 4</xref> represents the tasks in which OWL performed better than the best ML model for the same tasks and <xref rid="figure5" ref-type="fig">Figure 5</xref> represents the tasks with higher F1 scores in the ML model against OWL. SVM+W+C has a considerable advantage over the other ML strategies, as the strategy of choice to be compared in the vast majority of cases. The best tasks performed by the best model in each case, either SVM+W+C or OWL, do not coincide. Indeed, there is a potential complementarity between ML and alternatives.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Best performed tasks in Ontology versus top-ranked model. mRS: Modified Rankin Score; NIHSS: National Institutes of Health Stroke Scale; SVM: support vector machine; W+C+SVM: word- term frequency-inverted document frequency and character- term frequency-inverted document frequency combined with support vector machine.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e29120_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Best performed tasks in the top-ranked model versus Ontology. SVM: support vector machine; W+C: word-term frequency-inverted document frequency and character-term frequency-inverted document frequency.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e29120_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Effect of Class Imbalance on the Results—Undersampling</title>
        <p>As we have discussed, all our tasks are extremely skewed, in the sense that the NI (noninformed; majority) class dominates over the other (minority) classes, where the useful information really lies. This imbalance occurs in a proportion that can achieve 1:1000 examples in the minority class to the majority class for some tasks.</p>
        <p>This imbalance may cause bias in the training data set influencing some of the experimented ML algorithms toward giving priority to NI class, ultimately undermining the classification of the minority classes on which predictions are most important. One approach to addressing the problem of class imbalance is to randomly resample the training data set. A simple, yet effective approach to deal with the problem is to randomly delete examples from the majority class, a technique known as random undersampling [<xref ref-type="bibr" rid="ref81">81</xref>].</p>
        <p>The results of this experiment are shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>, which compares the performance of the classifiers in scenarios with and without undersampling. For the sake of space, we only show the results for the best nonneural (W+C+SVM) and neural (BERT) classifiers, but the results are similar for all tested classifiers (<xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>).</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Results of Macro-F1 score in the undersampling sample, expressed by percentage. mRS: Modified Rankin Score; NIHSS: National Institutes of Health Stroke Scale; SVM: support vector machine; W+C: word- term frequency-inverted document frequency and character- term frequency-inverted document frequency.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e29120_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>As it can been seen, the undersampling process caused major losses in both classifiers. Such losses occurred across all tasks, varying from 5% of Macro-F1 score reduction (death) to 58% (NIHSS) for W+C+SVM, and 11% (death) to 98% (NIHSS) of Macro-F1 effectiveness loss in BERT. The largest losses for the neural method were expected, as this type of classifier is more sensitive to the amount of training. However, to a certain degree, all the classifiers suffered major losses after the undersampling process. These results may be attributed to the largest difference in class distribution between training and testing and the inevitable loss of information that comes after the removal of training instances after undersampling.</p>
        <p>These phenomena can be better seen when we look at the individual values of F1, precision, and recall of the classes of the tasks. <xref ref-type="table" rid="table3">Table 3</xref> shows an example of the tasks of infection indication, thrombolytic therapy, and ability to communicate with the W+C+SVM classifier. As we can see, all classes have a reduced F1 in the undersampling scenario. This is mainly due to a large reduction in the precision of the classes. This happens because W+C+SVM misclassifies NI instances as belonging to some of the relevant classes. As the classifier is obliged to categorize a sentence in one of the existing classes, the lack of information about the fact that a sentence does not have useful information for assigning the sentence in one of the classes of interest confounds the classifier. In other words, the negative information about the NI (eg, frequent words in NI sentences that help to characterize this class but that are also shared by some non-NI instances, and whose frequency was altered by the undersampling) is in fact useful information for avoiding false positives, which may cause many problems in a real scenario, including false alarms, waste of resources, and distrust of the automatic methods.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Comparison of undersampling and original sampling in terms of precision, recall, and Macro-F1 score (W+C+SVM model).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="70"/>
            <col width="160"/>
            <col width="110"/>
            <col width="180"/>
            <col width="0"/>
            <col width="160"/>
            <col width="120"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Class</td>
                <td colspan="4">Undersampling</td>
                <td colspan="3">Original sampling</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Precision</td>
                <td>Recall</td>
                <td>F1 (%)<sup>a</sup></td>
                <td colspan="2">Precision</td>
                <td>Recall</td>
                <td>F1 (%)<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="9">
                  <bold>Infection indicative</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>−1</td>
                <td>1</td>
                <td>0.96</td>
                <td>98</td>
                <td colspan="2">0.99</td>
                <td>1</td>
                <td>99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>0.39</td>
                <td>0.89</td>
                <td>54</td>
                <td colspan="2">0.88</td>
                <td>0.75</td>
                <td>81</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>0.28</td>
                <td>0.82</td>
                <td>42</td>
                <td colspan="2">0.68</td>
                <td>0.53</td>
                <td>59</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Thrombolytic therapy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>−1</td>
                <td>1</td>
                <td>0.98</td>
                <td>99</td>
                <td colspan="2">1</td>
                <td>1</td>
                <td>100</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>0.32</td>
                <td>0.62</td>
                <td>42</td>
                <td colspan="2">0.69</td>
                <td>0.52</td>
                <td>59</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>0.31</td>
                <td>0.91</td>
                <td>47</td>
                <td colspan="2">0.89</td>
                <td>0.91</td>
                <td>90</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Ability to communicate</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>−1</td>
                <td>1</td>
                <td>0.96</td>
                <td>98</td>
                <td colspan="2">0.99</td>
                <td>1</td>
                <td>100</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>0.34</td>
                <td>0.63</td>
                <td>44</td>
                <td colspan="2">0.9</td>
                <td>0.26</td>
                <td>40</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>0.35</td>
                <td>0.81</td>
                <td>49</td>
                <td colspan="2">0.76</td>
                <td>0.64</td>
                <td>69</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2</td>
                <td>0.32</td>
                <td>0.93</td>
                <td>48</td>
                <td colspan="2">0.82</td>
                <td>0.8</td>
                <td>81</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Macro-F1 score (W+C+SVM model).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Feature Importance</title>
        <p>For the tasks presented in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> (alcoholism, atrial fibrillation, coronary artery disease, dyslipidemia, obesity, NIHSS, Rankin [mRs], infection indicators, high blood pressure, death, ability to feed orally, and ability to communicate), we present the top 10 clinical features (ie, words) used in the task prediction in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>, which means the 10 features with higher contribution to task prediction. This analysis helps to better understand the divergence between approaches. It is worth noting that in the tasks in which the ML models performed better (second column), the top-ranked features were all related to the semantics of the task. For instance, considering the <italic>death</italic> task as an example, the ML model was able to identify important features for the task, which produced a higher information gain than the OWL model. Indeed, for <italic>death</italic>, only three features of the 10 most relevant explicitly use the word <italic>death</italic>, but most features are somewhat related to this outcome. This finding suggests data quality issues (vocabulary coverage) that may drastically influence the effectiveness of the OWL strategy, which exploits only rules that explicitly contain the word <italic>death</italic> (or related ones) but no other terms. However, for the features in the first column, in which the OWL models were better, there were still features with considerable contributions that were not directly related to the information sought. For example, to mention the NIHSS task, rule-based knowledge models built alongside clinical domain vocabulary specialists may be the best alternative.</p>
        <boxed-text id="box1" position="float">
          <title>Top 10 clinical indicators for task prediction models and feature importance. In parenthesis, the translation to English language is indicated, where there may be misspellings in the original writing that are also indicated.</title>
          <p>
            <bold>Alcoholism</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>etilismo (alcoholism)</p>
            </list-item>
            <list-item>
              <p>etilista (alcoholic)</p>
            </list-item>
            <list-item>
              <p>fumo (smoke)</p>
            </list-item>
            <list-item>
              <p>históira (story with misspelling in the original)</p>
            </list-item>
            <list-item>
              <p>álcool (alcohol)</p>
            </list-item>
            <list-item>
              <p>cart</p>
            </list-item>
            <list-item>
              <p>osteoartrose (osteoarthritis)</p>
            </list-item>
            <list-item>
              <p>ttu (short for transurethral resection of the prostate)</p>
            </list-item>
            <list-item>
              <p>tabagismo (smoking)</p>
            </list-item>
            <list-item>
              <p>cesária (cesarean)</p>
            </list-item>
          </list>
          <p>
            <bold>Atrial fibrillation</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>fa (short for atrial fibrillation)</p>
            </list-item>
            <list-item>
              <p>comorbidades (comorbidities)</p>
            </list-item>
            <list-item>
              <p>acfa (short for atrial fibrillation)</p>
            </list-item>
            <list-item>
              <p>paroxística (paroxysmal)</p>
            </list-item>
            <list-item>
              <p>has (short for high blood pressure)</p>
            </list-item>
            <list-item>
              <p>anticoagulado (anticoagulated)</p>
            </list-item>
            <list-item>
              <p>depressão (depression)</p>
            </list-item>
            <list-item>
              <p>indeterminado (indeterminate)</p>
            </list-item>
            <list-item>
              <p>digoxina (digoxin)</p>
            </list-item>
            <list-item>
              <p>institucionalizada (institutionalized)</p>
            </list-item>
          </list>
          <p>
            <bold>Coronary artery disease</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>cardiopatia (heart disease)</p>
            </list-item>
            <list-item>
              <p>isquêmica (ischemic)</p>
            </list-item>
            <list-item>
              <p>actp (short for percutaneous transluminal coronary angioplasty)</p>
            </list-item>
            <list-item>
              <p>dp</p>
            </list-item>
            <list-item>
              <p>crm (short for myocardial revascularization surgery)</p>
            </list-item>
            <list-item>
              <p>iam (short for acute myocardial infarction)</p>
            </list-item>
            <list-item>
              <p>2014</p>
            </list-item>
            <list-item>
              <p>infarto (short for acute myocardial infarction)</p>
            </list-item>
            <list-item>
              <p>mm</p>
            </list-item>
            <list-item>
              <p>sf</p>
            </list-item>
          </list>
          <p>
            <bold>Dyslipidemia</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>dislipidemia (dyslipidemia)</p>
            </list-item>
            <list-item>
              <p>comorbidades (comorbidities)</p>
            </list-item>
            <list-item>
              <p>1hora</p>
            </list-item>
            <list-item>
              <p>cesária (cesarean)</p>
            </list-item>
            <list-item>
              <p>morbidades (morbidities)</p>
            </list-item>
            <list-item>
              <p>puerpera (puerperal)</p>
            </list-item>
            <list-item>
              <p>has (short for high blood pressure)</p>
            </list-item>
            <list-item>
              <p>fêmur (fêmur)</p>
            </list-item>
            <list-item>
              <p>tep</p>
            </list-item>
            <list-item>
              <p>previas (previous)</p>
            </list-item>
          </list>
          <p>
            <bold>Obesity</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>BMI (short for body mass index)</p>
            </list-item>
            <list-item>
              <p>obesidade (obesity)</p>
            </list-item>
            <list-item>
              <p>m²</p>
            </list-item>
            <list-item>
              <p>1994</p>
            </list-item>
            <list-item>
              <p>lipschitz</p>
            </list-item>
            <list-item>
              <p>eutrofia</p>
            </list-item>
            <list-item>
              <p>altura (height)</p>
            </list-item>
            <list-item>
              <p>peso (weight)</p>
            </list-item>
            <list-item>
              <p>estatura (stature)</p>
            </list-item>
            <list-item>
              <p>obesa (obese)</p>
            </list-item>
          </list>
          <p>
            <bold>National Institutes of Health Stroke Scale</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>nihss</p>
            </list-item>
            <list-item>
              <p>súbito (sudden)</p>
            </list-item>
            <list-item>
              <p>asistolia (asystolia)</p>
            </list-item>
            <list-item>
              <p>sens</p>
            </list-item>
            <list-item>
              <p>territ</p>
            </list-item>
            <list-item>
              <p>suboclusiva (subocclusive)</p>
            </list-item>
            <list-item>
              <p>perg</p>
            </list-item>
            <list-item>
              <p>mecania (mecanic with mispelling in the original)</p>
            </list-item>
            <list-item>
              <p>severo (severe)</p>
            </list-item>
            <list-item>
              <p>visto (seen)</p>
            </list-item>
          </list>
          <p>
            <bold>Ability to communicate</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>afasia (afasia)</p>
            </list-item>
            <list-item>
              <p>comunicativa (talkative)</p>
            </list-item>
            <list-item>
              <p>disartria (dysarthria)</p>
            </list-item>
            <list-item>
              <p>comunicativo (talkative)</p>
            </list-item>
            <list-item>
              <p>colóquio (colloquium)</p>
            </list-item>
            <list-item>
              <p>verbalizando (verbalizing)</p>
            </list-item>
            <list-item>
              <p>alerta (alert)</p>
            </list-item>
            <list-item>
              <p>verbaliza (verbalizes)</p>
            </list-item>
            <list-item>
              <p>expressão (expression)</p>
            </list-item>
            <list-item>
              <p>hemiparesia (hemiparesis)</p>
            </list-item>
          </list>
          <p>
            <bold>Ability to feed orally</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>vo (short for orally)</p>
            </list-item>
            <list-item>
              <p>sne (short for nasoenteral probe)</p>
            </list-item>
            <list-item>
              <p>dieta (diet)</p>
            </list-item>
            <list-item>
              <p>pastosa (pasty)</p>
            </list-item>
            <list-item>
              <p>gastrostomia (gastrostomy)</p>
            </list-item>
            <list-item>
              <p>enteral (enteral)</p>
            </list-item>
            <list-item>
              <p>aceitação (acceptance)</p>
            </list-item>
            <list-item>
              <p>semi (semi)</p>
            </list-item>
            <list-item>
              <p>exclusiva (exclusive)</p>
            </list-item>
            <list-item>
              <p>polimérica (polymeric diet)</p>
            </list-item>
          </list>
          <p>
            <bold>Death</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>óbito (death)</p>
            </list-item>
            <list-item>
              <p>constato (i’ve verified)</p>
            </list-item>
            <list-item>
              <p>leito (bed)</p>
            </list-item>
            <list-item>
              <p>ar (air)</p>
            </list-item>
            <list-item>
              <p>estável (stable)</p>
            </list-item>
            <list-item>
              <p>ambiente (environment or room)</p>
            </list-item>
            <list-item>
              <p>no</p>
            </list-item>
            <list-item>
              <p>doação (donation)</p>
            </list-item>
            <list-item>
              <p>obito (death with misspelling in the original)</p>
            </list-item>
            <list-item>
              <p>óbito (death with misspelling in the original)</p>
            </list-item>
          </list>
          <p>
            <bold>High blood pressure</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>has (short for high blood pressure)</p>
            </list-item>
            <list-item>
              <p>dm (short for diabetes)</p>
            </list-item>
            <list-item>
              <p>dislipidemia (dyslipidemia)</p>
            </list-item>
            <list-item>
              <p>dm2 (short for diabetes type 2)</p>
            </list-item>
            <list-item>
              <p>comorbidades (comorbidities)</p>
            </list-item>
            <list-item>
              <p>fa (short for atrial fibrillation)</p>
            </list-item>
            <list-item>
              <p>artrite (arthritis)</p>
            </list-item>
            <list-item>
              <p>definitivo (definitive)</p>
            </list-item>
            <list-item>
              <p>reumatoide (rheumatoid)</p>
            </list-item>
            <list-item>
              <p>demencial (dementia)</p>
            </list-item>
          </list>
          <p>
            <bold>Infection indication</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>afebril (afebrile)</p>
            </list-item>
            <list-item>
              <p>flogísticos (phlogistic)</p>
            </list-item>
            <list-item>
              <p>sinais (signs)</p>
            </list-item>
            <list-item>
              <p>cefuroxima (cefuroxime)</p>
            </list-item>
            <list-item>
              <p>inserção (insertion)</p>
            </list-item>
            <list-item>
              <p>tax</p>
            </list-item>
            <list-item>
              <p>klebsiella (klebsiella)</p>
            </list-item>
            <list-item>
              <p>d0 (short for day 0)</p>
            </list-item>
            <list-item>
              <p>atb (short for antibiotics)</p>
            </list-item>
            <list-item>
              <p>azitromicina (azithromycin)</p>
            </list-item>
          </list>
          <p>
            <bold>Modified Rankin Score</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>rankin</p>
            </list-item>
            <list-item>
              <p>mrankin</p>
            </list-item>
            <list-item>
              <p>demência (dementia)</p>
            </list-item>
            <list-item>
              <p>caminha (walks)</p>
            </list-item>
            <list-item>
              <p>corversa (talks)</p>
            </list-item>
            <list-item>
              <p>alimenta (feed)</p>
            </list-item>
            <list-item>
              <p>alzheimer</p>
            </list-item>
            <list-item>
              <p>aparentes (apparent)</p>
            </list-item>
            <list-item>
              <p>comer (eat)</p>
            </list-item>
            <list-item>
              <p>mrk (mrs with misspelling in the original)</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The study intended to recognize the path and opportunities that may be advanced in terms of the technological capacity to support the outcome measurement process for the stroke care pathway. Real-world sentences from ischemic stroke EMRs were used to develop automatic models using ML and NLP techniques. It was possible to identify that SVM+W+C and SVM+BoW were the most effective models to be used to classify characteristics of a patient and process of care based on the extraction of Brazilian-Portuguese free-text data from the EMRs of patients. Ontological rules were also effective in this task, and perhaps even more importantly, most of the best-performing tasks with the OWL and ML models did not coincide. This outcome opens up the opportunity to exploit such complementarities to improve the coverage of tasks when implementing a real solution for outcome management or even to improve the individual effectiveness of each alternative by means of ensemble techniques such as stacking [<xref ref-type="bibr" rid="ref82">82</xref>].</p>
        <p>One of the good practices that the literature has demonstrated to increase the success of ML algorithms applied to health care is the inclusion of a clinical background in the annotation process [<xref ref-type="bibr" rid="ref83">83</xref>]. The availability of training data is critical in obtaining good results, thus indicating that variations in clinical terms found in the clinical text could be specific to the type and source of clinical notes that may not have been captured in an available resource. The results from our feature importance analysis are consistent with other study results [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref68">68</xref>,<xref ref-type="bibr" rid="ref76">76</xref>,<xref ref-type="bibr" rid="ref83">83</xref>-<xref ref-type="bibr" rid="ref85">85</xref>] concerning many clinical terms applied to multiple clinical concepts, although there are specific patterns based on semantic types that can help. In general, it is difficult to determine the correct concept when a clinical term normalizes to multiple concepts, and this issue can penalize the effectiveness of the model [<xref ref-type="bibr" rid="ref86">86</xref>,<xref ref-type="bibr" rid="ref87">87</xref>].</p>
        <p>Our effectiveness results agree with the literature [<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref88">88</xref>], in which a Macro-F1 score &#62;80% is considered a successful extraction of medical records. Even though there is still a need to cover more tasks related to ICHOM patient-reported outcome measures [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref74">74</xref>,<xref ref-type="bibr" rid="ref76">76</xref>,<xref ref-type="bibr" rid="ref85">85</xref>], we hypothesized that these tasks comprise a feeling state, and the lack of normalization of data contained in EMRs may explain the fact that these task categories did not perform very well [<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref89">89</xref>]. Medical records related to baseline characteristics and care processes typically contain much more structured data (eg, numerical values for tasks) than medical patient-reported outcomes, which focus more on unstructured data [<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref90">90</xref>]. This issue has been explored in previous studies on EMR-based clinical quality measures [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref82">82</xref>], in which it is suggested that these kinds of data (for baseline characteristics and care-related processes) have the potential to be scaled in other clinical conditions, such as cardiovascular and endocrine conditions [<xref ref-type="bibr" rid="ref83">83</xref>].</p>
        <p>Previous studies have found various advantages of EMR compared with traditional paper records [<xref ref-type="bibr" rid="ref91">91</xref>]. However, as reported by Ausserhofer et al [<xref ref-type="bibr" rid="ref12">12</xref>], care workers do not find them useful for guaranteeing safe care and treatment because of the difficulty of tracking clinical and quality measures. The same authors have discussed the importance of having IT capability to track care workers’ documentation while increasing safety and quality of care. They emphasized that this approach is important for addressing EMR data collection issues that have been historically extracted via manual review by clinical experts, leading to scalability and cost issues [<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref85">85</xref>,<xref ref-type="bibr" rid="ref90">90</xref>]. In our study, it was possible to demonstrate that for the stroke care pathway, the use of ML models to measure clinical outcomes remains a challenge, but the technology has the potential to support the extraction of relevant patient characteristics and care-process information.</p>
        <p>Despite the challenges regarding the accuracy of the outcome measures, promising approaches regarding baseline characteristics and care-related process data have been achieved. This may be the first step toward unlocking the full potential of EMR data [<xref ref-type="bibr" rid="ref83">83</xref>]. The usefulness of having baseline characteristics tracked is to assist disease prevalence studies and identify opportunities to guide political decisions about the public health sector [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref92">92</xref>,<xref ref-type="bibr" rid="ref93">93</xref>], automatize eligibility of patients for clinical research [<xref ref-type="bibr" rid="ref84">84</xref>], and feed risk assessment tools [<xref ref-type="bibr" rid="ref94">94</xref>]. On the contrary, care-related process metrics boost the opportunity to improve decision-making with new technologies, maintain the effectiveness of treatments, and encourage alternative remuneration models [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref92">92</xref>,<xref ref-type="bibr" rid="ref95">95</xref>].</p>
        <p>The next step would be to invest in the automation of tasks at the patient level that support the control of the progression of patients in real-time during stroke episodes. In a similar manner, it would be useful to identify opportunities to improve the EMR data quality, such as the implementation of quality software with dynamic autocompletes with normalized terms register. The use of NLP for quality measures also adds to the capture of large amounts of clinical data from EMRs [<xref ref-type="bibr" rid="ref82">82</xref>]. The products of NLP and mixed methods pipelines could potentially impact a number of clinical areas and could facilitate appropriate care by feeding hospital outcome indicators and data to support epidemiological studies or value-based programs [<xref ref-type="bibr" rid="ref82">82</xref>].</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study had several limitations. For clinical NLP method development to advance further globally and to become an integral part of clinical outcome research or have a natural place in clinical practice, there are still challenges ahead. Our work is based on the EMR of a single center, with a limited number of annotated patients. Thus, further work is needed to test this approach in EMRs from different centers with different patients, who may use different languages for clinical documentation. We have no access to data from exams or hospital indicators, which is the reason why our infection identification, for example, was based on any report of antibiotic use, typical symptoms of infection, or tests described. We were unable to find data samples that included all the risk factors that were discovered in the literature. It would be worth conducting a future study with a larger and different data set with more features to examine whether the findings of this research are still valid. Finally, the design focused on sentences can be significantly influenced by the NI data volume—if a patient smokes, this will probably be reflected in just one sentence, maybe two, and for all of the others, you will have NI. One possible approach would be to use hierarchy models to first classify whether a sentence is relevant and then evolve to classification algorithms to predict classes. Then, the entire record can inform the prediction of the outcome of patients, instead of saying whether a specific sentence indicates a task.</p>
        <p>Regarding the undersampling experiment, more intelligent strategies such as choosing the <italic>most positive of the negative samples</italic> or Tomek links [<xref ref-type="bibr" rid="ref81">81</xref>] should be tested for better effectiveness. We leave this for future work and suggest practical purposes to maintain the original distribution, whereas more effective strategies are not further studied.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study is innovative in that it considered many and diverse types of automatic classifiers (neural, nonneural, and ontological) using a large real-world data set containing thousands of textual sentences from real-world EMRs and a large number of tasks (n=24) with multiple classes using Brazilian-Portuguese unstructured free-text EMR databases. The effectiveness of these models demonstrated a better result when used to classify care processes and patient characteristics than patient-reported outcomes, which suggests that advances in intelligence in informational technology for clinical outcomes are still a gap in the scalability of outcome measurements in health care. Future research should explore the development of mixed methods to increase task effectiveness. Advances in IT capacity have proved to be essential for the scalability and agility of the ability to measure health outcomes and how it reflects on its external validation to support health real-time quality measurement indicators.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Example of an evolution on the electronic medical record.</p>
        <media xlink:href="medinform_v9i11e29120_app1.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Example of the annotation process.</p>
        <media xlink:href="medinform_v9i11e29120_app2.docx" xlink:title="DOCX File , 19 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Data set characteristics.</p>
        <media xlink:href="medinform_v9i11e29120_app3.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Details of the automatic text classification methods.</p>
        <media xlink:href="medinform_v9i11e29120_app4.docx" xlink:title="DOCX File , 28 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Experimental procedure.</p>
        <media xlink:href="medinform_v9i11e29120_app5.png" xlink:title="PNG File , 97 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p>Experimental protocol details—specific parameter tuning.</p>
        <media xlink:href="medinform_v9i11e29120_app6.docx" xlink:title="DOCX File , 15 KB"/>
      </supplementary-material>
      <supplementary-material id="app7">
        <label>Multimedia Appendix 7</label>
        <p>Results of F1 score from the random undersampling experiment. BERT: bidirectional encoder representation from transformers; BoW: Bag-of-Words; KNN: K-nearest neighbor; mRS: Modified Rankin Score; NIHSS: National Institutes of Health Stroke Scale; SVM: support vector machine; TFIDF: term frequency-inverted document frequency; W+C: word- term frequency-inverted document frequency and character- term frequency-inverted document frequency.</p>
        <media xlink:href="medinform_v9i11e29120_app7.png" xlink:title="PNG File , 308 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>bidirectional encoder representation from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BoW</term>
          <def>
            <p>Bag-of-Words</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">IT</term>
          <def>
            <p>information technology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">KNN</term>
          <def>
            <p>K-nearest neighbor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">NIHSS</term>
          <def>
            <p>National Institutes of Health Stroke Scale</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">OWL</term>
          <def>
            <p>ontology web language</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">TFIDF</term>
          <def>
            <p>term frequency-inverted document frequency</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="conflict">
        <p>The authors disclose receipt of the following financial support for the research, authorship, and publication of this paper: this research was supported by the Coordenação de Aperfeiçoamento de Pessoal de Nível Superior–Brasil (CAPES)–Finance Code 001 and National Council for Scientific and Technological Development (CNPq 465518/2014-1 and others), Research Support Foundation of the State of Minas Gerais (FAPEMIG), Google, and NVIDIA Corporation.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>GBD 2016 Stroke Collaborators</collab>
          </person-group>
          <article-title>Global, regional, and national burden of stroke, 1990-2016: a systematic analysis for the Global Burden of Disease Study 2016</article-title>
          <source>Lancet Neurol</source>
          <year>2019</year>
          <month>05</month>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>439</fpage>
          <lpage>58</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1474-4422(19)30034-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S1474-4422(19)30034-1</pub-id>
          <pub-id pub-id-type="medline">30871944</pub-id>
          <pub-id pub-id-type="pii">S1474-4422(19)30034-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC6494974</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>Findings From the Global Burden of Disease Study 2017</article-title>
          <source>Institute for Health Metrics and Evaluation (IHME)</source>
          <year>2018</year>
          <access-date>2021-10-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.healthdata.org/sites/default/files/files/policy_report/2019/GBD_2017_Booklet.pdf">http://www.healthdata.org/sites/default/files/files/policy_report/2019/GBD_2017_Booklet.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kiik</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Peek</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Curcin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Rudd</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Douiri</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wolfe</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Bray</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>A systematic review of machine learning models for predicting outcomes of stroke with structured data</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <month>6</month>
          <day>12</day>
          <volume>15</volume>
          <issue>6</issue>
          <fpage>e0234722</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0234722"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0234722</pub-id>
          <pub-id pub-id-type="medline">32530947</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-04723</pub-id>
          <pub-id pub-id-type="pmcid">PMC7292406</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kamal</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Sheth</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>Machine learning in acute ischemic stroke neuroimaging</article-title>
          <source>Front Neurol</source>
          <year>2018</year>
          <month>11</month>
          <day>8</day>
          <volume>9</volume>
          <fpage>945</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fneur.2018.00945"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fneur.2018.00945</pub-id>
          <pub-id pub-id-type="medline">30467491</pub-id>
          <pub-id pub-id-type="pmcid">PMC6236025</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Badgeley</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mocco</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Oermann</surname>
              <given-names>EK</given-names>
            </name>
          </person-group>
          <article-title>Deep learning guided stroke management: a review of clinical applications</article-title>
          <source>J Neurointerv Surg</source>
          <year>2018</year>
          <month>04</month>
          <volume>10</volume>
          <issue>4</issue>
          <fpage>358</fpage>
          <lpage>62</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1136/neurintsurg-2017-013355"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/neurintsurg-2017-013355</pub-id>
          <pub-id pub-id-type="medline">28954825</pub-id>
          <pub-id pub-id-type="pii">neurintsurg-2017-013355</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Deep into the brain: artificial intelligence in stroke imaging</article-title>
          <source>J Stroke</source>
          <year>2017</year>
          <month>09</month>
          <volume>19</volume>
          <issue>3</issue>
          <fpage>277</fpage>
          <lpage>85</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.5853/jos.2017.02054"/>
          </comment>
          <pub-id pub-id-type="doi">10.5853/jos.2017.02054</pub-id>
          <pub-id pub-id-type="medline">29037014</pub-id>
          <pub-id pub-id-type="pii">jos-2017-02054</pub-id>
          <pub-id pub-id-type="pmcid">PMC5647643</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wodchis</surname>
              <given-names>WP</given-names>
            </name>
            <name name-style="western">
              <surname>Austin</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Henry</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>A 3-year study of high-cost users of health care</article-title>
          <source>Can Med Asso J</source>
          <year>2016</year>
          <month>02</month>
          <day>16</day>
          <volume>188</volume>
          <issue>3</issue>
          <fpage>182</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cmaj.ca/cgi/pmidlookup?view=long&#38;pmid=26755672"/>
          </comment>
          <pub-id pub-id-type="doi">10.1503/cmaj.150064</pub-id>
          <pub-id pub-id-type="medline">26755672</pub-id>
          <pub-id pub-id-type="pii">cmaj.150064</pub-id>
          <pub-id pub-id-type="pmcid">PMC4754179</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Markatou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Don</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sorrentino</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ebadollahi</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Case-based reasoning in comparative effectiveness research</article-title>
          <source>IBM J Res Dev</source>
          <year>2012</year>
          <month>9</month>
          <volume>56</volume>
          <issue>5</issue>
          <fpage>4:1</fpage>
          <lpage>4:12</lpage>
          <pub-id pub-id-type="doi">10.1147/JRD.2012.2198311</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>WF</given-names>
            </name>
          </person-group>
          <article-title>Prediction modeling using EHR data: challenges, strategies, and a comparison of machine learning approaches</article-title>
          <source>Med Care</source>
          <year>2010</year>
          <month>06</month>
          <volume>48</volume>
          <issue>6 Suppl</issue>
          <fpage>106</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1097/MLR.0b013e3181de9e17</pub-id>
          <pub-id pub-id-type="medline">20473190</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chechulin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nazerian</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rais</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Malikov</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Predicting patients with high risk of becoming high-cost healthcare users in Ontario (Canada)</article-title>
          <source>Health Care Policy</source>
          <year>2014</year>
          <month>02</month>
          <day>26</day>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>68</fpage>
          <lpage>79</lpage>
          <pub-id pub-id-type="doi">10.12927/hcpol.2014.23710</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rotmensch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Halpern</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tlimat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Horng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sontag</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Learning a health knowledge graph from electronic medical records</article-title>
          <source>Sci Rep</source>
          <year>2017</year>
          <month>07</month>
          <day>20</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>5994</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.1038/s41598-017-05778-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-017-05778-z</pub-id>
          <pub-id pub-id-type="medline">28729710</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-017-05778-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC5519723</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ausserhofer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Favez</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zúñiga</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Electronic health record use in Swiss nursing homes and its association with implicit rationing of nursing care documentation: multicenter cross-sectional survey study</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>03</month>
          <day>02</day>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>e22974</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/3/e22974/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/22974</pub-id>
          <pub-id pub-id-type="medline">33650983</pub-id>
          <pub-id pub-id-type="pii">v9i3e22974</pub-id>
          <pub-id pub-id-type="pmcid">PMC7967228</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Casey</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Adler</surname>
              <given-names>NE</given-names>
            </name>
          </person-group>
          <article-title>Using electronic health records for population health research: a review of methods and applications</article-title>
          <source>Annu Rev Public Health</source>
          <year>2016</year>
          <volume>37</volume>
          <fpage>61</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26667605"/>
          </comment>
          <pub-id pub-id-type="doi">10.1146/annurev-publhealth-032315-021353</pub-id>
          <pub-id pub-id-type="medline">26667605</pub-id>
          <pub-id pub-id-type="pmcid">PMC6724703</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fernandes</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alabsi</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Brenner</surname>
              <given-names>LN</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Collens</surname>
              <given-names>SI</given-names>
            </name>
            <name name-style="western">
              <surname>Leone</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Robbins</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Mukerji</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Westover</surname>
              <given-names>MB</given-names>
            </name>
          </person-group>
          <article-title>Classification of the disposition of patients hospitalized with COVID-19: reading discharge summaries using natural language processing</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>03</month>
          <day>10</day>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>e25457</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/2/e25457/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/25457</pub-id>
          <pub-id pub-id-type="medline">33449908</pub-id>
          <pub-id pub-id-type="pii">v9i2e25457</pub-id>
          <pub-id pub-id-type="pmcid">PMC7879729</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Porter</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>The strategy that will fix health care</article-title>
          <source>Harvard Business Review</source>
          <year>2013</year>
          <access-date>2021-09-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hbr.org/2013/10/the-strategy-that-will-fix-health-care">https://hbr.org/2013/10/the-strategy-that-will-fix-health-care</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Golas</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Shibahara</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Agboola</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Otaki</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sato</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nakae</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hisamitsu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kojima</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Felsted</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kakarmath</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kvedar</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jethwani</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>A machine learning model to predict the risk of 30-day readmissions in patients with heart failure: a retrospective analysis of electronic medical records data</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2018</year>
          <month>06</month>
          <day>22</day>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>44</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-018-0620-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-018-0620-z</pub-id>
          <pub-id pub-id-type="medline">29929496</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-018-0620-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC6013959</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Glaser</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>It’s time for a new kind of electronic health record</article-title>
          <source>Harvard Bussiness Review</source>
          <year>2020</year>
          <access-date>2021-09-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hbr.org/2020/06/its-time-for-a-new-kind-of-electronic-health-record">https://hbr.org/2020/06/its-time-for-a-new-kind-of-electronic-health-record</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carberry</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Landman</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Feeley</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Henderson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fraser</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Incorporating longitudinal pediatric patient-centered outcome measurement into the clinical workflow using a commercial electronic health record: a step toward increasing value for the patient</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2016</year>
          <month>01</month>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>88</fpage>
          <lpage>93</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26377989"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocv125</pub-id>
          <pub-id pub-id-type="medline">26377989</pub-id>
          <pub-id pub-id-type="pii">ocv125</pub-id>
          <pub-id pub-id-type="pmcid">PMC7814927</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Abram</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kullo</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Arruda-Olson</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Identifying peripheral arterial disease cases using natural language processing of clinical notes</article-title>
          <source>IEEE EMBS Int Conf Biomed Health Inform</source>
          <year>2016</year>
          <month>02</month>
          <volume>2016</volume>
          <fpage>126</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28111640"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/BHI.2016.7455851</pub-id>
          <pub-id pub-id-type="medline">28111640</pub-id>
          <pub-id pub-id-type="pmcid">PMC5248569</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rolfes</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Seabright</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ryu</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Voge</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Bachman</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Kita</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Croghan</surname>
              <given-names>IT</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Juhn</surname>
              <given-names>YJ</given-names>
            </name>
          </person-group>
          <article-title>Application of a natural language processing algorithm to asthma ascertainment. An automated chart review</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2017</year>
          <month>08</month>
          <day>15</day>
          <volume>196</volume>
          <issue>4</issue>
          <fpage>430</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28375665"/>
          </comment>
          <pub-id pub-id-type="doi">10.1164/rccm.201610-2006OC</pub-id>
          <pub-id pub-id-type="medline">28375665</pub-id>
          <pub-id pub-id-type="pmcid">PMC5564673</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chase</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Mitrani</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>GG</given-names>
            </name>
            <name name-style="western">
              <surname>Fulgieri</surname>
              <given-names>DJ</given-names>
            </name>
          </person-group>
          <article-title>Early recognition of multiple sclerosis using natural language processing of the electronic health record</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2017</year>
          <month>02</month>
          <day>28</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>24</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-017-0418-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-017-0418-4</pub-id>
          <pub-id pub-id-type="medline">28241760</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-017-0418-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC5329909</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garvin</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gobbel</surname>
              <given-names>GT</given-names>
            </name>
            <name name-style="western">
              <surname>Matheny</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Redd</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bray</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Heidenreich</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bolton</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Heavirland</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Reeves</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kalsy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Meystre</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>Automating quality measures for heart failure using natural language processing: a descriptive study in the department of veterans affairs</article-title>
          <source>JMIR Med Inform</source>
          <year>2018</year>
          <month>01</month>
          <day>15</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>e5</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://medinform.jmir.org/2018/1/e5/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.9150</pub-id>
          <pub-id pub-id-type="medline">29335238</pub-id>
          <pub-id pub-id-type="pii">v6i1e5</pub-id>
          <pub-id pub-id-type="pmcid">PMC5789165</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nekkantti</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jonnagaddala</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Family history information extraction with neural attention and an enhanced relation-side scheme: algorithm development and validation</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>12</month>
          <day>01</day>
          <volume>8</volume>
          <issue>12</issue>
          <fpage>e21750</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/12/e21750/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21750</pub-id>
          <pub-id pub-id-type="medline">33258777</pub-id>
          <pub-id pub-id-type="pii">v8i12e21750</pub-id>
          <pub-id pub-id-type="pmcid">PMC7738250</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>TH</given-names>
            </name>
          </person-group>
          <article-title>Putting the value framework to work</article-title>
          <source>N Engl J Med</source>
          <year>2010</year>
          <month>12</month>
          <day>23</day>
          <volume>363</volume>
          <issue>26</issue>
          <fpage>2481</fpage>
          <lpage>3</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMp1013111</pub-id>
          <pub-id pub-id-type="medline">21142527</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blumenthal</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tavenner</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The "meaningful use" regulation for electronic health records</article-title>
          <source>N Engl J Med</source>
          <year>2010</year>
          <month>08</month>
          <day>5</day>
          <volume>363</volume>
          <issue>6</issue>
          <fpage>501</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMp1006114</pub-id>
          <pub-id pub-id-type="medline">20647183</pub-id>
          <pub-id pub-id-type="pii">NEJMp1006114</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Porter</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Larsson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>TH</given-names>
            </name>
          </person-group>
          <article-title>Standardizing patient outcomes measurement</article-title>
          <source>N Engl J Med</source>
          <year>2016</year>
          <month>02</month>
          <day>11</day>
          <volume>374</volume>
          <issue>6</issue>
          <fpage>504</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMp1511701</pub-id>
          <pub-id pub-id-type="medline">26863351</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Hareendran</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Baird</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>UG</given-names>
            </name>
            <name name-style="western">
              <surname>Muir</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Bone</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Improving the assessment of outcomes in stroke: use of a structured interview to assign grades on the modified Rankin Scale</article-title>
          <source>Stroke</source>
          <year>2002</year>
          <month>09</month>
          <volume>33</volume>
          <issue>9</issue>
          <fpage>2243</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1161/01.str.0000027437.22450.bd</pub-id>
          <pub-id pub-id-type="medline">12215594</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lyden</surname>
              <given-names>PD</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Levine</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Brott</surname>
              <given-names>TG</given-names>
            </name>
            <name name-style="western">
              <surname>Broderick</surname>
              <given-names>J</given-names>
            </name>
            <collab>NINDS rtPA Stroke Study Group</collab>
          </person-group>
          <article-title>A modified National Institutes of Health Stroke Scale for use in stroke clinical trials: preliminary reliability and validity</article-title>
          <source>Stroke</source>
          <year>2001</year>
          <month>06</month>
          <volume>32</volume>
          <issue>6</issue>
          <fpage>1310</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1161/01.str.32.6.1310</pub-id>
          <pub-id pub-id-type="medline">11387492</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Caso</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Zakaria</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tomek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mikulik</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Martins</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rossouw</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Improving stroke care across the world: the ANGELS Initiative</article-title>
          <source>CNS - Oruen Ltd</source>
          <year>2018</year>
          <access-date>2021-09-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.oruen.com/wp-content/uploads/2018/12/Review-article-4.pdf">https://www.oruen.com/wp-content/uploads/2018/12/Review-article-4.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Honnibal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Montani</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Industrial-strength natural language processing</article-title>
          <source>spaCy</source>
          <access-date>2021-09-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://spacy.io">https://spacy.io</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bugert</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Boullosa</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>de Castilho</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Gurevych</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>The INCEpTION platform: machine-assisted and knowledge-oriented interactive annotation</article-title>
          <source>Proceedings of the 27th International Conference on Computational Linguistics: System Demonstrations</source>
          <year>2018</year>
          <month>08</month>
          <day>01</day>
          <conf-name>27th International Conference on Computational Linguistics: System Demonstrations</conf-name>
          <conf-date>August, 2018</conf-date>
          <conf-loc>Santa Fe, New Mexico</conf-loc>
          <fpage>5</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/C18-2002/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d18-2022</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Raghawan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Schutze</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>Introduction to Information Retrieval</source>
          <year>2008</year>
          <publisher-loc>Cambridge</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
          <fpage>1</fpage>
          <lpage>506</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schutze</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>Foundations of Statistical Natural Language Processing</source>
          <year>1999</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
          <fpage>1</fpage>
          <lpage>720</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Viera</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Garrett</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Understanding interobserver agreement: the kappa statistic</article-title>
          <source>Fam Med</source>
          <year>2005</year>
          <month>05</month>
          <volume>37</volume>
          <issue>5</issue>
          <fpage>360</fpage>
          <lpage>3</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.stfm.org/fmhub/fm2005/May/Anthony360.pdf"/>
          </comment>
          <pub-id pub-id-type="medline">15883903</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cunha</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Mangaravite</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Gomes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Canuto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Resende</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Nascimento</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Viegas</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>França</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Martins</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Rosa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rocha</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gonçalves</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>On the cost-effectiveness of neural and non-neural approaches and representations for text classification: a comprehensive comparative study</article-title>
          <source>Inf Process Manag</source>
          <year>2021</year>
          <month>05</month>
          <volume>58</volume>
          <issue>3</issue>
          <fpage>102481</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2020.102481</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Canuto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Salles</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rosa</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Couto</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gonçalves</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Similarity-based synthetic document representations for meta-feature generation in text classification</article-title>
          <source>Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval</source>
          <year>2019</year>
          <month>01</month>
          <day>01</day>
          <conf-name>SIGIR '19: The 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval</conf-name>
          <conf-date>Jul 21-25, 2019</conf-date>
          <conf-loc>Paris France</conf-loc>
          <fpage>355</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1145/3331184.3331239</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Canuto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Salles</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gonçalves</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rocha</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ramos</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gonçalves</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>On efficient meta-level features for effective text classification</article-title>
          <source>Proceedings of the 23rd ACM International Conference on Conference on Information and Knowledge Management</source>
          <year>2014</year>
          <month>01</month>
          <day>01</day>
          <conf-name>CIKM '14: 2014 ACM Conference on Information and Knowledge Management</conf-name>
          <conf-date>Nov 3-7, 2014</conf-date>
          <conf-loc>Shanghai China</conf-loc>
          <fpage>1709</fpage>
          <lpage>18</lpage>
          <pub-id pub-id-type="doi">10.1145/2661829.2662060</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Canuto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sousa</surname>
              <given-names>DX</given-names>
            </name>
            <name name-style="western">
              <surname>Goncalves</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Rosa</surname>
              <given-names>TC</given-names>
            </name>
          </person-group>
          <article-title>A thorough evaluation of distance-based meta-features for automated text classification</article-title>
          <source>IEEE Trans Knowl Data Eng</source>
          <year>2018</year>
          <month>3</month>
          <day>27</day>
          <volume>30</volume>
          <issue>12</issue>
          <fpage>2242</fpage>
          <lpage>56</lpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2018.2820051</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cunha</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Canuto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Viegas</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Salles</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gomes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mangaravite</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Resende</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rosa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gonçalves</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Rocha</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Extended pre-processing pipeline for text classification: on the role of meta-feature representations, sparsification and selective sampling</article-title>
          <source>Inf Process Manag</source>
          <year>2020</year>
          <month>07</month>
          <volume>57</volume>
          <issue>4</issue>
          <fpage>102263</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2020.102263</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mehrabi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Clinical information extraction applications: a literature review</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>01</month>
          <volume>77</volume>
          <fpage>34</fpage>
          <lpage>49</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30256-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.11.011</pub-id>
          <pub-id pub-id-type="medline">29162496</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30256-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC5771858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Mach Learn</source>
          <year>2001</year>
          <month>10</month>
          <day>1</day>
          <volume>45</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kowsari</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Meimandi</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Heidarysafa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mendu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Text classification algorithms: a survey</article-title>
          <source>Information</source>
          <year>2019</year>
          <month>04</month>
          <day>23</day>
          <volume>10</volume>
          <issue>4</issue>
          <fpage>150</fpage>
          <pub-id pub-id-type="doi">10.3390/info10040150</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>RR</given-names>
            </name>
          </person-group>
          <article-title>Introduction to information retrieval</article-title>
          <source>J Am Soc Inf Sci Technol</source>
          <year>2009</year>
          <month>10</month>
          <day>19</day>
          <volume>61</volume>
          <issue>4</issue>
          <fpage>852</fpage>
          <lpage>3</lpage>
          <pub-id pub-id-type="doi">10.1002/asi.21234</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Weinberg</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Darden</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Pedersen</surname>
              <given-names>LG</given-names>
            </name>
          </person-group>
          <article-title>Gene selection for sample classification based on gene expression data: study of sensitivity to choice of parameters of the GA/KNN method</article-title>
          <source>Bioinformatics</source>
          <year>2001</year>
          <month>12</month>
          <volume>17</volume>
          <issue>12</issue>
          <fpage>1131</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/17.12.1131</pub-id>
          <pub-id pub-id-type="medline">11751221</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Bax</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <article-title>Uma visão geral sobre ontologias: pesquisa sobre definições, tipos, aplicações, métodos de avaliação e de construção</article-title>
          <source>Ci Inf</source>
          <year>2003</year>
          <month>12</month>
          <volume>32</volume>
          <issue>3</issue>
          <fpage>7</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1590/s0100-19652003000300002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Allahyari</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kochut</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Janik</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Ontology-based text classification into dynamically defined topics</article-title>
          <source>Proceedings of the IEEE International Conference on Semantic Computing</source>
          <year>2014</year>
          <month>01</month>
          <day>01</day>
          <conf-name>IEEE International Conference on Semantic Computing</conf-name>
          <conf-date>Jun 16-18, 2014</conf-date>
          <conf-loc>Newport Beach, CA, USA</conf-loc>
          <fpage>273</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1109/icsc.2014.51</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hsieh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Using ontology-based text classification to assist job hazard analysis</article-title>
          <source>Adv Eng Inf</source>
          <year>2014</year>
          <month>10</month>
          <volume>28</volume>
          <issue>4</issue>
          <fpage>381</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1016/j.aei.2014.05.001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garla</surname>
              <given-names>VN</given-names>
            </name>
            <name name-style="western">
              <surname>Brandt</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Ontology-guided feature engineering for clinical text classification</article-title>
          <source>J Biomed Inform</source>
          <year>2012</year>
          <month>10</month>
          <volume>45</volume>
          <issue>5</issue>
          <fpage>992</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(12)00063-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2012.04.010</pub-id>
          <pub-id pub-id-type="medline">22580178</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(12)00063-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC3431438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>McKay</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Abbass</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Barlow</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A comparative study for domain ontology guided feature extraction</article-title>
          <source>Australian Computer Society</source>
          <year>2003</year>
          <access-date>2021-09-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.13.3384&#38;rep=rep1&#38;type=pdf">http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.13.3384&#38;rep=rep1&#38;type=pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Salton</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Buckley</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Term-weighting approaches in automatic text retrieval</article-title>
          <source>Inf Process Manag</source>
          <year>1988</year>
          <month>1</month>
          <volume>24</volume>
          <issue>5</issue>
          <fpage>513</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1016/0306-4573(88)90021-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Andrade</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Gonçalves</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Combining representations for effective citation classification</article-title>
          <source>Proceedings of The International Workshop on Mining Scientific Publications</source>
          <year>2020</year>
          <conf-name>The International Workshop on Mining Scientific Publications</conf-name>
          <conf-date>Aug 2020</conf-date>
          <conf-loc>Wuhan, China</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cortes</surname>
              <given-names>EG</given-names>
            </name>
            <name name-style="western">
              <surname>Woloszyn</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Barone</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>When, where, who, what or why? A hybrid model to question answering systems</article-title>
          <source>Computational Processing of the Portuguese Language</source>
          <year>2018</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Viegas</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rocha</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Resende</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Salles</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Martins</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Freitas</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Gonçalves</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Exploiting efficient and effective lazy Semi-Bayesian strategies for text classification</article-title>
          <source>Neurocomput</source>
          <year>2018</year>
          <month>09</month>
          <day>13</day>
          <volume>307</volume>
          <fpage>153</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1016/j.neucom.2018.04.033</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fei</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Simultaneous Support Vector selection and parameter optimization using Support Vector Machines for sentiment classification</article-title>
          <source>Proceedings of the 2016 7th IEEE International Conference on Software Engineering and Service Science (ICSESS)</source>
          <year>2016</year>
          <conf-name>2016 7th IEEE International Conference on Software Engineering and Service Science (ICSESS)</conf-name>
          <conf-date>Aug 26-28, 2016</conf-date>
          <conf-loc>Beijing, China</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ICSESS.2016.7883015</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Selection incentives in a performance-based contracting system</article-title>
          <source>Health Serv Res</source>
          <year>2003</year>
          <month>04</month>
          <volume>38</volume>
          <issue>2</issue>
          <fpage>535</fpage>
          <lpage>52</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/12785560"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/1475-6773.00132</pub-id>
          <pub-id pub-id-type="medline">12785560</pub-id>
          <pub-id pub-id-type="pmcid">PMC1360913</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Georgakopoulos</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Tasoulis</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Vrahatis</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Plagianakos</surname>
              <given-names>VP</given-names>
            </name>
          </person-group>
          <article-title>Convolutional neural networks for toxic comment classification</article-title>
          <source>Proceedings of the 10th Hellenic Conference on Artificial Intelligence</source>
          <year>2018</year>
          <conf-name>SETN '18: 10th Hellenic Conference on Artificial Intelligence</conf-name>
          <conf-date>Jul 9-12, 2018</conf-date>
          <conf-loc>Patras Greece</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3200947.3208069</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Deep pyramid convolutional neural networks for text categorization</article-title>
          <source>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</source>
          <year>2017</year>
          <conf-name>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</conf-name>
          <conf-date>Jul 30 - Aug 4, 2017</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/P17-1052</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M-W</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>Proceedings of the conference of the North American chapter of the association for computational linguistics: Human language technologies</source>
          <year>2019</year>
          <conf-name>Proceedings of the conference of the North American chapter of the association for computational linguistics: Human language technologies</conf-name>
          <conf-date>Jun,2019</conf-date>
          <conf-loc>Minneapolis, Minnesota</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gomez-Perez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Corcho</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Fernández-López</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>Ontological Engineering With Examples from the Areas of Knowledge Management, E-Commerce and the Semantic Web</source>
          <year>2004</year>
          <publisher-loc>London</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Karypis</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Centroid-based document classification: analysis and experimental results</article-title>
          <source>Principles of Data Mining and Knowledge Discovery</source>
          <year>2000</year>
          <publisher-loc>Berlin, Heidelberg</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Manevitz</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Yousef</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>One-class svms for document classification</article-title>
          <source>J Mach Learn Res</source>
          <year>2002</year>
          <month>1</month>
          <day>3</day>
          <volume>2</volume>
          <fpage>139</fpage>
          <lpage>54</lpage>
          <pub-id pub-id-type="doi">10.5555/944790.944808</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Layeghian Javan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sepehri</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Aghajani</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Toward analyzing and synthesizing previous research in early prediction of cardiac arrest using machine learning based on a multi-layered integrative framework</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>12</month>
          <volume>88</volume>
          <fpage>70</fpage>
          <lpage>89</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30203-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.10.008</pub-id>
          <pub-id pub-id-type="medline">30389440</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30203-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Salles</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gonçalves</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rodrigues</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Rocha</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Improving random forests by neighborhood projection for effective text classification</article-title>
          <source>Inf Syst</source>
          <year>2018</year>
          <month>09</month>
          <volume>77</volume>
          <fpage>1</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.1016/j.is.2018.05.006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cawley</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Talbot</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>On over-fitting in model selection and subsequent selection bias in performance evaluation</article-title>
          <source>J Mach Learn Res</source>
          <year>2010</year>
          <volume>11</volume>
          <fpage>2079</fpage>
          <lpage>107</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume11/cawley10a/cawley10a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Velupillai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Suominen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liakata</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Morley</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Osborn</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hayes</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Downs</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Dutta</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Using clinical Natural Language Processing for health outcomes research: overview and actionable suggestions for future advances</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>12</month>
          <volume>88</volume>
          <fpage>11</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30201-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.10.005</pub-id>
          <pub-id pub-id-type="medline">30368002</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30201-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6986921</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saito</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rehmsmeier</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The precision-recall plot is more informative than the ROC plot when evaluating binary classifiers on imbalanced datasets</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <month>3</month>
          <day>4</day>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>e0118432</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0118432"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0118432</pub-id>
          <pub-id pub-id-type="medline">25738806</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-26790</pub-id>
          <pub-id pub-id-type="pmcid">PMC4349800</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zar</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <source>Biostatistical Analysis, 5th Edition</source>
          <year>2010</year>
          <publisher-loc>London, UK</publisher-loc>
          <publisher-name>Pearson</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reys</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Silva</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Severo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pedro</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>de Sousa e Sá</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Salgado</surname>
              <given-names>GA</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Cerri</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Prati</surname>
              <given-names>RC</given-names>
            </name>
          </person-group>
          <article-title>Predicting multiple ICD-10 codes from Brazilian-Portuguese clinical notes</article-title>
          <source>Intelligent Systems</source>
          <year>2020</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Federated learning on clinical benchmark data: performance assessment</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>10</month>
          <day>26</day>
          <volume>22</volume>
          <issue>10</issue>
          <fpage>e20891</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/10/e20891/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/20891</pub-id>
          <pub-id pub-id-type="medline">33104011</pub-id>
          <pub-id pub-id-type="pii">v22i10e20891</pub-id>
          <pub-id pub-id-type="pmcid">PMC7652692</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kate</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Clinical term normalization using learned edit patterns and subconcept matching: system development and evaluation</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>01</month>
          <day>14</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>e23104</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/1/e23104/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23104</pub-id>
          <pub-id pub-id-type="medline">33443483</pub-id>
          <pub-id pub-id-type="pii">v9i1e23104</pub-id>
          <pub-id pub-id-type="pmcid">PMC7843202</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Yetisgen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderwende</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Horvitz</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Predicting severe clinical events by learning about life-saving actions and outcomes using distant supervision</article-title>
          <source>J Biomed Inform</source>
          <year>2020</year>
          <month>07</month>
          <volume>107</volume>
          <fpage>103425</fpage>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2020.103425</pub-id>
          <pub-id pub-id-type="medline">32348850</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(20)30053-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sheikhpour</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sarram</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Gharaghani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chahooki</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>A survey on semi-supervised feature selection methods</article-title>
          <source>Pattern Recognit</source>
          <year>2017</year>
          <month>04</month>
          <volume>64</volume>
          <fpage>141</fpage>
          <lpage>58</lpage>
          <pub-id pub-id-type="doi">10.1016/j.patcog.2016.11.003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Diao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Huo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>An application of machine learning to etiological diagnosis of secondary hypertension: retrospective study using electronic medical records</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>01</month>
          <day>25</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>e19739</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/1/e19739/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19739</pub-id>
          <pub-id pub-id-type="medline">33492233</pub-id>
          <pub-id pub-id-type="pii">v9i1e19739</pub-id>
          <pub-id pub-id-type="pmcid">PMC7870351</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>A stroke risk detection: improving hybrid feature selection method</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>04</month>
          <day>02</day>
          <volume>21</volume>
          <issue>4</issue>
          <fpage>e12437</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/4/e12437/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12437</pub-id>
          <pub-id pub-id-type="medline">30938684</pub-id>
          <pub-id pub-id-type="pii">v21i4e12437</pub-id>
          <pub-id pub-id-type="pmcid">PMC6466481</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guillaume</surname>
              <given-names>LF</given-names>
            </name>
            <name name-style="western">
              <surname>Christos</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Aridas</surname>
              <given-names>CK</given-names>
            </name>
          </person-group>
          <article-title>Imbalanced-learn: a python toolbox to tackle the curse of imbalanced datasets in machine learning</article-title>
          <source>J Mach Learn Res</source>
          <year>2017</year>
          <month>1</month>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>559</fpage>
          <lpage>63</lpage>
          <pub-id pub-id-type="doi">10.5555/3122009.3122026</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kogan</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Twyman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Heap</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Milentijevic</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Alberts</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Assessing stroke severity using electronic health record data: a machine learning approach</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2020</year>
          <month>01</month>
          <day>08</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>8</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-1010-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-1010-x</pub-id>
          <pub-id pub-id-type="medline">31914991</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-1010-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC6950922</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="web">
          <article-title>Healthcare Improvement - Patient-Reported Outcomes</article-title>
          <source>ICHOM</source>
          <access-date>2021-09-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ichom.org/">https://www.ichom.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Freeman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Barret</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nordan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Spaulding</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kaplan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Karney</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Lessons from Mayo clinic’s redesign of stroke care</article-title>
          <source>Harvard Business Review</source>
          <year>2018</year>
          <access-date>2021-09-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hbr.org/2018/10/lessons-from-mayo-clinics-redesign-of-stroke-care">https://hbr.org/2018/10/lessons-from-mayo-clinics-redesign-of-stroke-care</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref79">
        <label>79</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feigin</surname>
              <given-names>VL</given-names>
            </name>
            <name name-style="western">
              <surname>Krishnamurthi</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Stroke is largely preventable across the globe: where to next?</article-title>
          <source>Lancet</source>
          <year>2016</year>
          <month>08</month>
          <day>20</day>
          <volume>388</volume>
          <issue>10046</issue>
          <fpage>733</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(16)30679-1</pub-id>
          <pub-id pub-id-type="medline">27431357</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(16)30679-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref80">
        <label>80</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>El-Gohary</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Ontology-based multilabel text classification of construction regulatory documents</article-title>
          <source>J Comput Civ Eng</source>
          <year>2015</year>
          <month>09</month>
          <volume>30</volume>
          <issue>4</issue>
          <fpage>04015058</fpage>
          <pub-id pub-id-type="doi">10.1061/(asce)cp.1943-5487.0000530</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref81">
        <label>81</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chawla</surname>
              <given-names>NV</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Maimon</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Rokach</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Data mining for imbalanced datasets: an overview</article-title>
          <source>Data Mining and Knowledge Discovery Handbook</source>
          <year>2010</year>
          <publisher-loc>US</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref82">
        <label>82</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weiskopf</surname>
              <given-names>NG</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>FJ</given-names>
            </name>
            <name name-style="western">
              <surname>Woodcock</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dorr</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Cigarroa</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>A mixed methods task analysis of the implementation and validation of EHR-based clinical quality measures</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2017</year>
          <month>2</month>
          <day>10</day>
          <volume>2016</volume>
          <fpage>1229</fpage>
          <lpage>37</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28269920"/>
          </comment>
          <pub-id pub-id-type="medline">28269920</pub-id>
          <pub-id pub-id-type="pmcid">PMC5333295</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref83">
        <label>83</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sheikhalishahi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Lavelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rinaldi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Osmani</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of clinical notes on chronic diseases: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>04</month>
          <day>27</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>e12239</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/2/e12239/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12239</pub-id>
          <pub-id pub-id-type="medline">31066697</pub-id>
          <pub-id pub-id-type="pii">v7i2e12239</pub-id>
          <pub-id pub-id-type="pmcid">PMC6528438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref84">
        <label>84</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kurian</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Caswell-Jin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sledge</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tamang</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Using natural language processing to construct a metastatic breast cancer cohort from linked cancer registry and electronic medical records data</article-title>
          <source>JAMIA Open</source>
          <year>2019</year>
          <month>9</month>
          <day>18</day>
          <volume>2</volume>
          <issue>4</issue>
          <fpage>528</fpage>
          <lpage>37</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32025650"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamiaopen/ooz040</pub-id>
          <pub-id pub-id-type="medline">32025650</pub-id>
          <pub-id pub-id-type="pii">ooz040</pub-id>
          <pub-id pub-id-type="pmcid">PMC6994019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref85">
        <label>85</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Rogers</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bates</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Fischer</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Use of electronic healthcare records to identify complex patients with atrial fibrillation for targeted intervention</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2017</year>
          <month>03</month>
          <day>01</day>
          <volume>24</volume>
          <issue>2</issue>
          <fpage>339</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocw082</pub-id>
          <pub-id pub-id-type="medline">27375290</pub-id>
          <pub-id pub-id-type="pii">ocw082</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref86">
        <label>86</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shamsuddin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ralescu</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Classification with class imbalance problem: a review</article-title>
          <source>Int J Advance Soft Compu Appl</source>
          <year>2013</year>
          <month>11</month>
          <volume>5</volume>
          <issue>3</issue>
          <fpage>176</fpage>
          <lpage>204</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/288228469_Classification_with_class_imbalance_problem_A_review"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref87">
        <label>87</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>SC</given-names>
            </name>
          </person-group>
          <article-title>A learning method for the class imbalance problem with medical data sets</article-title>
          <source>Comput Biol Med</source>
          <year>2010</year>
          <month>05</month>
          <volume>40</volume>
          <issue>5</issue>
          <fpage>509</fpage>
          <lpage>18</lpage>
          <pub-id pub-id-type="doi">10.1016/j.compbiomed.2010.03.005</pub-id>
          <pub-id pub-id-type="medline">20347072</pub-id>
          <pub-id pub-id-type="pii">S0010-4825(10)00040-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref88">
        <label>88</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Geng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cong</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Model-based reasoning of clinical diagnosis in integrative medicine: real-world methodological study of electronic medical records and natural language processing methods</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>12</month>
          <day>21</day>
          <volume>8</volume>
          <issue>12</issue>
          <fpage>e23082</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/12/e23082/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23082</pub-id>
          <pub-id pub-id-type="medline">33346740</pub-id>
          <pub-id pub-id-type="pii">v8i12e23082</pub-id>
          <pub-id pub-id-type="pmcid">PMC7781803</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref89">
        <label>89</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ridgway</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Uvin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schmitt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Oliwa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Almirol</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schneider</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of clinical notes to identify mental illness and substance use among people living with HIV: retrospective cohort study</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>03</month>
          <day>10</day>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>e23456</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/3/e23456/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23456</pub-id>
          <pub-id pub-id-type="medline">33688848</pub-id>
          <pub-id pub-id-type="pii">v9i3e23456</pub-id>
          <pub-id pub-id-type="pmcid">PMC7991991</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref90">
        <label>90</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Ananthakrishnan</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cagan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gainer</surname>
              <given-names>VS</given-names>
            </name>
            <name name-style="western">
              <surname>Goryachev</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Agniel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Churchill</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Plenge</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Karlson</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Methods to develop an electronic medical record phenotype algorithm to compare the risk of coronary artery disease across 3 chronic disease cohorts</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <month>8</month>
          <day>24</day>
          <volume>10</volume>
          <issue>8</issue>
          <fpage>e0136651</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0136651"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0136651</pub-id>
          <pub-id pub-id-type="medline">26301417</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-43453</pub-id>
          <pub-id pub-id-type="pmcid">PMC4547801</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref91">
        <label>91</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kruse</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Mileski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Alaytsev</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Carol</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Adoption factors associated with electronic health record among long-term care facilities: a systematic review</article-title>
          <source>BMJ Open</source>
          <year>2015</year>
          <month>01</month>
          <day>28</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>e006615</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=25631311"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2014-006615</pub-id>
          <pub-id pub-id-type="medline">25631311</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2014-006615</pub-id>
          <pub-id pub-id-type="pmcid">PMC4316426</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref92">
        <label>92</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beam</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>IS</given-names>
            </name>
          </person-group>
          <article-title>Big data and machine learning in health care</article-title>
          <source>JAMA</source>
          <year>2018</year>
          <month>04</month>
          <day>03</day>
          <volume>319</volume>
          <issue>13</issue>
          <fpage>1317</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2017.18391</pub-id>
          <pub-id pub-id-type="medline">29532063</pub-id>
          <pub-id pub-id-type="pii">2675024</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref93">
        <label>93</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bugnon</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Geissbuhler</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bischoff</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bonnabry</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>von Plessen</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Improving primary care medication processes by using shared electronic medication plans in Switzerland: lessons learned from a participatory action research study</article-title>
          <source>JMIR Form Res</source>
          <year>2021</year>
          <month>01</month>
          <day>07</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>e22319</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2021/1/e22319/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/22319</pub-id>
          <pub-id pub-id-type="medline">33410753</pub-id>
          <pub-id pub-id-type="pii">v5i1e22319</pub-id>
          <pub-id pub-id-type="pmcid">PMC7819781</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref94">
        <label>94</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nakatani</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nakao</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Uchiyama</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Toyoshiba</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ochiai</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Predicting inpatient falls using natural language processing of nursing records obtained from Japanese electronic medical records: case-control study</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>04</month>
          <day>22</day>
          <volume>8</volume>
          <issue>4</issue>
          <fpage>e16970</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/4/e16970/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16970</pub-id>
          <pub-id pub-id-type="medline">32319959</pub-id>
          <pub-id pub-id-type="pii">v8i4e16970</pub-id>
          <pub-id pub-id-type="pmcid">PMC7203618</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref95">
        <label>95</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dafny</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Health care needs real competition</article-title>
          <source>Harvard Business Review (Competitive Strategy)</source>
          <year>2016</year>
          <access-date>2021-09-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hbr.org/2016/12/health-care-needs-real-competition">https://hbr.org/2016/12/health-care-needs-real-competition</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
