<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i2e29803</article-id>
      <article-id pub-id-type="pmid">35200154</article-id>
      <article-id pub-id-type="doi">10.2196/29803</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Identification of Prediabetes Discussions in Unstructured Clinical Documentation: Validation of a Natural Language Processing Algorithm</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Peeples</surname>
            <given-names>Malinda</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Burns</surname>
            <given-names>Michael</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Elbattah</surname>
            <given-names>Mahmoud</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Serban</surname>
            <given-names>Ovidiu</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Schwartz</surname>
            <given-names>Jessica L</given-names>
          </name>
          <degrees>MD, MHS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Division of General Internal Medicine</institution>
            <institution>Johns Hopkins School of Medicine</institution>
            <addr-line>2024 E Monument St.</addr-line>
            <addr-line>Ste 2-604D</addr-line>
            <addr-line>Baltimore, MD, 21205</addr-line>
            <country>United States</country>
            <fax>1 410 955 0476</fax>
            <phone>1 973 722 8552</phone>
            <email>jschwa64@jhmi.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3326-8989</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Tseng</surname>
            <given-names>Eva</given-names>
          </name>
          <degrees>MD, MPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4001-2869</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Maruthur</surname>
            <given-names>Nisa M</given-names>
          </name>
          <degrees>MD, MHS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5799-104X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Rouhizadeh</surname>
            <given-names>Masoud</given-names>
          </name>
          <degrees>MS, PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9006-6112</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Division of General Internal Medicine</institution>
        <institution>Johns Hopkins School of Medicine</institution>
        <addr-line>Baltimore, MD</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Division of Hospital Medicine</institution>
        <institution>Johns Hopkins Hospital</institution>
        <addr-line>Baltimore, MD</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Welch Center for Prevention, Epidemiology, &#38; Clinical Research</institution>
        <institution>Johns Hopkins University</institution>
        <addr-line>Baltimore, MD</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Epidemiology</institution>
        <institution>Johns Hopkins University Bloomberg School of Public Health</institution>
        <addr-line>Baltimore, MD</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Pharmaceutical Outcomes and Policy</institution>
        <institution>University of Florida College of Pharmacy</institution>
        <addr-line>Gainesville, FL</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Division of Biomedical Informatics and Data Science</institution>
        <institution>Johns Hopkins University School of Medicine</institution>
        <addr-line>Baltimore, MD</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jessica L Schwartz <email>jschwa64@jhmi.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>2</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>24</day>
        <month>2</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>2</issue>
      <elocation-id>e29803</elocation-id>
      <history>
        <date date-type="received">
          <day>21</day>
          <month>4</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>23</day>
          <month>9</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>15</day>
          <month>11</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>4</day>
          <month>12</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Jessica L Schwartz, Eva Tseng, Nisa M Maruthur, Masoud Rouhizadeh. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 24.02.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/2/e29803" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Prediabetes affects 1 in 3 US adults. Most are not receiving evidence-based interventions, so understanding how providers discuss prediabetes with patients will inform how to improve their care.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to develop a natural language processing (NLP) algorithm using machine learning techniques to identify discussions of prediabetes in narrative documentation.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We developed and applied a keyword search strategy to identify discussions of prediabetes in clinical documentation for patients with prediabetes. We manually reviewed matching notes to determine which represented actual prediabetes discussions. We applied 7 machine learning models against our manual annotation.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Machine learning classifiers were able to achieve classification results that were close to human performance with up to 98% precision and recall to identify prediabetes discussions in clinical documentation.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We demonstrated that prediabetes discussions can be accurately identified using an NLP algorithm. This approach can be used to understand and identify prediabetes management practices in primary care, thereby informing interventions to improve guideline-concordant care.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>prediabetes</kwd>
        <kwd>prediabetes discussions</kwd>
        <kwd>prediabetes management</kwd>
        <kwd>chronic disease management</kwd>
        <kwd>physician-patient communication</kwd>
        <kwd>natural language processing</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Prediabetes affects 88 million US adults [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>], and evidence-based interventions focusing on lifestyle modification can prevent type 2 diabetes [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. In particular, the Diabetes Prevention Program is an effective lifestyle intervention that decreases diabetes incidence, with the most recent data showing a 27% risk reduction compared with the placebo arm over 15 years of follow up [<xref ref-type="bibr" rid="ref5">5</xref>]. Unfortunately, up to 89% of patients do not know they have prediabetes [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>], and many patients are unaware of interventions to decrease their risk of diabetes—relying on their primary care providers (PCPs) to initiate discussions about diabetes prevention, including the importance of lifestyle changes [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. However, survey data demonstrate that many providers feel that they lack the resources to effectively implement evidence-based prediabetes treatment [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Focused primary care interventions to support decision-making and education may be able to improve diagnosis of prediabetes and delivery of guideline-concordant care.</p>
      <p>Rigorous quality improvement interventions require evaluation using measurement before and after implementation of a project to determine whether there is a demonstrable change in target outcomes. Unfortunately, it is difficult to identify changes and improvement in prediabetes management through structured data alone. Relying on diagnosis codes is insufficient; one study showed that only 13% of patients with prediabetes had an International Classification of Diseases (ICD)-9 diagnosis of prediabetes or hyperglycemia [<xref ref-type="bibr" rid="ref14">14</xref>]. Although labs, orders, and referrals provide some insight, this information lacks detail about management, particularly lifestyle counseling, which is better captured in narrative documentation. This content is not easily queried and requires innovative research methods to accurately reflect delivery of prediabetes care.</p>
      <p>Prior studies have shown that natural language processing (NLP) can be used to diagnose chronic conditions, like diabetes, but few focus on disease management [<xref ref-type="bibr" rid="ref15">15</xref>]. Similarly, NLP studies in prediabetes have primarily focused on disease detection, screening, and predictive modeling, with no studies applying machine learning (ML) techniques to determine how prediabetes is managed [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]. Our goal was to develop a method to identify when providers discuss prediabetes management and treatment, which could later be used to determine if care delivered meets evidence-based guidelines and compare outcomes before and after an intervention. Therefore, we developed and validated NLP pipelines to identify primary care discussions about prediabetes in clinical documentation.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Population and Ethics Approval</title>
        <p>We identified patients with prediabetes who had an internal medicine primary care visit within an academic center with multiple ambulatory locations in Maryland and Washington, DC. Eligible patients were adults (≥18 years old) covered by 1 of 3 major insurers who completed an in-person visit and had a hemoglobin A<sub>1c</sub> (HbA<sub>1c</sub>) level between 5.7% and 6.4% between July 1, 2016 and December 31, 2018. Patients with diabetes (any type) based on billing codes or documentation in the problem list or past medical history were excluded. Data cleaning and analyses were performed using Stata 15. This study was approved by the Johns Hopkins Institutional Review Board (IRB00196984).</p>
      </sec>
      <sec>
        <title>Keyword Search Refinement (Phase 1)</title>
        <p>Based on clinical experience, we developed a list of keywords used to describe “prediabetes” (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). We identified visit notes containing these keywords using Python string matching and dictionary look-up, accounting for variations like spelling errors and morphological differences. We extracted a ±25-word concordance window (“note snippet”) for each match to provide textual context. Multiple snippets could come from the same note if multiple matching keywords were present.</p>
        <p>We selected 2 ambulatory clinics from our overall population. Of 315 patients meeting inclusion criteria, 40.6% (128/315) had at least one matching keyword during the study period. These patients had a total of 637 keyword matches across 324 encounters with 25 providers. We conducted manual annotation to determine which of the 637 note snippets represented true clinical discussions of prediabetes (yes or no). Outpatient provider documentation typically includes chief complaint, history of present illness, medical and family history, objective data including physical exam, and an assessment and plan. We considered use of a section identification pipeline to exclude specific sections of the notes (eg, past medical history) in which keywords would not represent prediabetes discussions. However, section identification pipelines are less generalizable, and the providers in our sample did not use standardized templates, making section boundaries difficult to define [<xref ref-type="bibr" rid="ref28">28</xref>]. Instead, note snippets were designated “no” during manual review if the keyword was only present in past medical history, a list of diagnoses outside of the assessment and plan, family history, or the description of a lab result.</p>
        <p>We double-reviewed a random sample of 200 note snippets. Interrater reliability (IRR) was 95%. Discrepancies between annotators were resolved via consensus to refine the definition of “prediabetes discussion.” We then manually reviewed patient records for 35.3% (66/187) of charts without a keyword match to identify false negatives. We reviewed all notes written by the patient’s PCP within the inclusion timeframe, and 9% (6/66) of patients had prediabetes discussions that were not captured. We added 3 keywords (“dysglycemia,” “hyperglycemia,” and “pre diabetes”) to the lexicon (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
      <sec>
        <title>Training Set (Phase 2)</title>
        <p>We developed a training set to test our prediabetes lexicon against patients from clinics not included in phase 1 (<xref rid="figure1" ref-type="fig">Figure 1</xref>). We included a single note per patient (n=1095), choosing the first encounter after the HbA<sub>1c</sub> result that met inclusion criteria. We applied the finalized keyword search, which resulted in 684 matches for 381 patients seen by 73 providers. We abstracted the 684 note snippets and annotated the notes using a similar process as above. We double-reviewed 34% of the note snippets with an IRR of 97% for manual annotation, resolving to 100% agreement upon review. We combined these results with note snippets from phase 1. To avoid overselection of a single patient or provider, we included note snippets from 1 encounter per patient for a total of 930 note snippets written by 96 unique providers.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Diagram depicting selection and review during keyword search refinement (Phase 1) and training set development (Phase 2). Eligible patients were adults (≥18 years old) covered by 1 of 3 major insurers who completed an in-person visit at a Johns Hopkins clinic and had an HbA<sub>1c</sub> level between 5.7% and 6.4% (39-46 mmol/mol) between July 1, 2016 and December 31, 2018. Note, double review indicates that 2 providers reviewed the keyword matches to identify whether the surrounding text represented a true prediabetes discussion.</p>
          </caption>
          <graphic xlink:href="medinform_v10i2e29803_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Rule-Based System</title>
        <p>Rule-based systems are frequently used for clinical concept extraction and text classification systems because of their ease of implementation and minimal computational requirements. To establish a strong baseline, we tested the feasibility of identifying prediabetes discussions with a rule-based classification scheme. Using the spaCy EntityRuler module [<xref ref-type="bibr" rid="ref29">29</xref>], we created 42 expert-developed patterns that, if present, would represent prediabetes discussions. The spaCy EntityRuler module facilitates various pattern, keyword, and regular expression searching and matching and allows us to account for morphological variations (eg, singular vs plural forms, conjunctions), as well as substitutions of different prepositions (eg, about vs for) and synonyms (eg, prediabetes, impaired fasting glucose). Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> provides our expert-developed patterns for this rule-based system. We randomly sampled 90% of the note snippets to develop and revise the rule-based system and evaluated the system on the remaining 10%.</p>
      </sec>
      <sec>
        <title>Machine Learning</title>
        <sec>
          <title>Feature Selection</title>
          <p>Note snippets from the training set were stemmed using the Porter stemmer, and common stop words were removed using the Natural Language Toolkit (NLTK) stop word list [<xref ref-type="bibr" rid="ref30">30</xref>]. We used the Python scikit-learn library [<xref ref-type="bibr" rid="ref31">31</xref>] to extract word ngram sequences (1-5 grams), weighted by term frequency-inverse document frequency (TF-IDF) [<xref ref-type="bibr" rid="ref32">32</xref>]. We applied logistic regression with L1 regularization [<xref ref-type="bibr" rid="ref33">33</xref>] to reduce the dimensionality of the feature vectors.</p>
        </sec>
        <sec>
          <title>Computational Environment</title>
          <p>Deep learning and ML experiments were conducted on the Johns Hopkins University (JHU) Precision Medicine Analytics Platform (PMAP), a high-performance, cloud-based, big-data platform to accelerate biomedical discovery and translate discovered knowledge to improve patient-centered care. PMAP pulls data from the Johns Hopkins Medicine electronic health record (EHR) to support processing by ML and NLP technologies. Statistical analysis and manual annotation were done in the JHU Secure Analytic Framework Environment, a virtual desktop that provides JHU investigators with a secure platform for analyzing and sharing sensitive data (including protected health information) with colleagues.</p>
        </sec>
        <sec>
          <title>Classification</title>
          <p>We used the labeled note snippets to train multiple ML classifiers to replicate human annotation for prediabetes discussions. We applied 6 binary classification models: logistic regression [<xref ref-type="bibr" rid="ref34">34</xref>], linear support vector machines (SVM) [<xref ref-type="bibr" rid="ref35">35</xref>], stochastic gradient descent (SGD) [<xref ref-type="bibr" rid="ref36">36</xref>], decision tree [<xref ref-type="bibr" rid="ref37">37</xref>], random forest [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], and Gaussian naïve Bayes (NB) [<xref ref-type="bibr" rid="ref40">40</xref>]. To reduce overfitting, each model was evaluated using 10-fold cross-validation by training, randomly, on 90% of the data and holding out 10% for testing. All modeling was performed in scikit-learn [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
          <p>We also applied convolutional neural networks (CNNs) for sentence categorization [<xref ref-type="bibr" rid="ref41">41</xref>], a well-established deep learning method in NLP for text classification [<xref ref-type="bibr" rid="ref42">42</xref>] using Python spaCy 2.1 implementation [<xref ref-type="bibr" rid="ref29">29</xref>]. We started with the tokenization of each note snippet and creating an embedding vector of each token using scispaCy large models (~785,000 vocabulary and 600,000 word vectors), pretrained on biomedical and clinical text [<xref ref-type="bibr" rid="ref43">43</xref>]. Next, to represent the tokens in context, these vectors were encoded into a sentence matrix by computing the vector for each token using a forward pass and a backward pass. After that, a self-attention mechanism was applied to reduce the dimensionality of the sentence matrix representation into a single context vector. Finally, these vectors were average-pooled and used as features in a simple feed-forward network for predicting true discussions of prediabetes. For the CNN model, we used the spaCy 2.2 default network architecture and parameters [<xref ref-type="bibr" rid="ref44">44</xref>].</p>
          <p>For each classification method, we reported on agreement, sensitivity and recall, specificity, positive predictive value and precision, and F measure using manual annotation as the gold standard. To test statistical significance between classification methods, we used MLxtend Python library to perform a 5x2 cross-validation paired <italic>t</italic> test [<xref ref-type="bibr" rid="ref45">45</xref>]. A <italic>P</italic> value &#60;.05 indicated that we could reject the null hypothesis that both models performed equally to classify prediabetes discussions.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>We identified 1410 patients with prediabetes; 518 (36.74%) had at least one keyword match. Among these patients, 435 (84.0%) had a true discussion about prediabetes in the manually reviewed documents (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
      <p>The rule-based system was inadequate for replicating human performance, with 72.5% recall and 42.6% specificity (<xref ref-type="table" rid="table1">Table 1</xref>). ML and CNN classification, however, were close to human performance across all models (<xref ref-type="table" rid="table1">Table 1</xref>). When comparing conventional classifiers with logistic regression (which had the highest agreement), only linear SVM and NB had similar performance (<italic>P</italic>=.11 and <italic>P</italic>=.15, respectively). CNN outperformed all conventional ML classifiers (logistic regression: <italic>P</italic>=.04; SVM: <italic>P</italic>=.02; SGD: <italic>P</italic>=.002; random forest: <italic>P</italic>=.002; decision tree: <italic>P</italic>=.001; NB: <italic>P</italic>=.03).</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Performance of machine learning methods to approximate manual annotation in identifying prediabetes discussions from primary care note snippets (n=930).</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="260"/>
          <col width="210"/>
          <col width="150"/>
          <col width="100"/>
          <col width="150"/>
          <col width="100"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Method</td>
              <td>Instances classifier agreed with manual annotation, n (%)</td>
              <td>Sensitivity/recall</td>
              <td>Specificity</td>
              <td>PPV<sup>a</sup>/precision</td>
              <td>F measure</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="7">
                <bold>Rule-based system</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Expert-developed patterns</td>
              <td>588 (63.2)</td>
              <td>0.725</td>
              <td>0.426</td>
              <td>0.737</td>
              <td>0.731</td>
            </tr>
            <tr valign="top">
              <td colspan="7">
                <bold>Machine learning</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Logistic regression</td>
              <td>885 (95.2)</td>
              <td>0.966</td>
              <td>0.921</td>
              <td>0.965</td>
              <td>0.965</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Linear support vector machines</td>
              <td>878 (94.4)</td>
              <td>0.962</td>
              <td>0.903</td>
              <td>0.957</td>
              <td>0.960</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Stochastic gradient descent</td>
              <td>858 (92.3)</td>
              <td>0.926</td>
              <td>0.915</td>
              <td>0.96</td>
              <td>0.943</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Random forest</td>
              <td>863 (92.8)</td>
              <td>0.961</td>
              <td>0.854</td>
              <td>0.937</td>
              <td>0.948</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Decision tree</td>
              <td>832 (89.5)</td>
              <td>0.923</td>
              <td>0.83</td>
              <td>0.925</td>
              <td>0.924</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Gaussian naïve Bayes</td>
              <td>883 (95.0)</td>
              <td>0.966</td>
              <td>0.912</td>
              <td>0.96</td>
              <td>0.963</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Convolutional neural networks</td>
              <td>910 (97.9)</td>
              <td>0.984</td>
              <td>0.966</td>
              <td>0.984</td>
              <td>0.984</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table1fn1">
            <p><sup>a</sup>PPV: positive predictive value.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <p>Manual annotation revealed a variety of linguistic patterns that did and did not represent clinical discussions of prediabetes (<xref ref-type="table" rid="table2">Table 2</xref>). Most commonly, true discussions were found in the assessment and plan, and those that did not were auto populated from structured fields. ML did result in 5% misclassification based on logistic regression, the best performing conventional classifier; a pattern was not apparent on review of these misclassified note snippets.</p>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Example text from clinical documentation containing keywords matching the “prediabetes” extraction lexicon, stratified by whether the text represents documentation of a prediabetes discussion.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="230"/>
          <col width="0"/>
          <col width="740"/>
          <thead>
            <tr valign="bottom">
              <td colspan="3">Location in note</td>
              <td>Representative text from note snippets<sup>a</sup></td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="4">
                <bold>Text containing keyword matches representing prediabetes discussions.</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Chief complaint</td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>Chief complaint: Patient is a 42 y.o. female here with questions about prediabetes.</p>
                  </list-item>
                  <list-item>
                    <p>Patient presents to the visit for an annual physical and reevaluation of HTN<sup>b</sup> and impaired fasting glucose.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>History of Present Illness</td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>Has a treadmill but not using regularly. Recent a1c was 6.2 consistent with pre-diabetes.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Visit Problem List</td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>Problem List Items Addressed This Visit Asthma Borderline diabetes Essential hypertension</p>
                  </list-item>
                  <list-item>
                    <p>Assessment Order Plan 1. Hyperlipidemia ... 7. Impaired fasting glucose 8. Health care maintenance</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Assessment &#38; Plan</td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>Hyperglycemia Lifestyle modification including diet and exercise discussed. 6. Elevated blood pressure.</p>
                  </list-item>
                  <list-item>
                    <p>Pre-diabetes Assessment: recent A1C in good range. Plan: exercise and healthy food changes.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td colspan="4">
                <bold>Text containing keyword matches not representing prediabetes discussions.</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>One-liner</td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>Patient with history of HTN, HLD<sup>c</sup>, prediabetes, scleroderma here for routine health assessment.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Past Medical History</td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>Past Medical History: Diagnosis Date Asthma 5/14/2008 ... Prediabetes 2/6/2012 Osteoporosis 5/14/2008</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Problem List</td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>... Hyperlipidemia E78.5 Impaired fasting glucose R73.01 Overweight E66.3 ...</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Diagnosis list</td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>Diagnoses of Essential hypertension, Osteoporosis, ..., Prediabetes, Asthma, ...</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Family history</td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>Family History Problem Relation Age of Onset Diabetes Father Prediabetes Paternal Grandfather...</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Pertinent positive</td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>Diagnosis remains unclear. He has prediabetes. Reports 2-3 months of intermittent palpitations.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Pertinent negative</td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>Likely has peripheral neuropathy. Negative RPR<sup>d</sup>, HIV, pre-diabetes.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Follow up reasons</td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>Follow up in 1 month for flu shot and prediabetes discussion.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Results<sup>e</sup></td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>For someone without known diabetes, a hemoglobin A<sub>1c</sub> value between 5.7 % and 6.4 % is consistent with prediabetes and should be confirmed.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>General guidelines<sup>e</sup></td>
              <td colspan="2">
                <list list-type="bullet">
                  <list-item>
                    <p>Type 2 diabetes or prediabetes All men beginning at age 45 and men without symptoms at any age who are overweight or obese and have 1 or more other risk factors.</p>
                  </list-item>
                </list>
              </td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table2fn1">
            <p><sup>a</sup>Text was modified for length and content to serve as general examples while protecting patient anonymity.</p>
          </fn>
          <fn id="table2fn2">
            <p><sup>b</sup>HTN: hypertension.</p>
          </fn>
          <fn id="table2fn3">
            <p><sup>c</sup>HLD: hyperlipidemia.</p>
          </fn>
          <fn id="table2fn4">
            <p><sup>d</sup>RPR: rapid plasma reagin.</p>
          </fn>
          <fn id="table2fn5">
            <p><sup>e</sup>Populated in notes from clinical decision support tools.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We utilized NLP and ML techniques to identify prediabetes discussions from unstructured narrative documentation with up to 98% precision and recall. To date, NLP techniques have been used in prediabetes for screening, diagnosis, risk stratification, predictive modeling, and intervention design [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref46">46</xref>-<xref ref-type="bibr" rid="ref50">50</xref>]. To our knowledge, this is the first NLP tool to identify prediabetes discussions. NLP methods have been applied in health care in many ways including in EHR free-text clinical notes to classify disease phenotype, with most studies using simple methods like shallow classifiers or combined with rule-based methods [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref51">51</xref>]. Compared with these studies, our NLP methods are not novel, but our application to disease management distinguishes our study from those that primarily focus on condition identification for chronic diseases [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
        <p>In our study, a simple rule-based system was inadequate to identify prediabetes discussions due to poor specificity. In contrast, all ML methods performed well, with 89% to 98% accuracy. This result demonstrates that prediabetes discussions, despite a variety of documentation styles, can be identified using NLP pipelines. Logistic regression, an efficient conventional classifier with minimal technical dependencies, was statistically outperformed by CNN, a deep learning technique. However, both identified &#62;95% of prediabetes discussions, suggesting that either method could be applied depending on system needs.</p>
        <p>Our NLP tool has multiple applications. The simplicity of logistic regression allows for deployment in operational settings, particularly clinical decision support. The tool can also simplify the analytic process before and after a clinical intervention intended to change provider practices. For example, it can isolate discussions about prediabetes, a task that otherwise requires time-consuming manual review. The context of these discussions could then be reviewed to understand the impact of an intervention. This process would strengthen the evaluation of quality improvement programs for prediabetes to promote guideline-concordant care, which includes lifestyle counseling [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. These methods should be replicable to identify conversations about behavioral interventions for other conditions, such as obesity, polysubstance abuse, or tobacco use, that rely heavily on counseling in addition to medication management and referrals.</p>
      </sec>
      <sec>
        <title>Strengths</title>
        <p>Our study has several strengths. The keyword refinement stage was rigorous. We validated the initial keyword list against a random sample from 2 ambulatory clinics, ensuring we reviewed a variety of documentation styles. Manual annotation was performed by 2 experts to standardize our definition of “prediabetes discussion,” leading to improvement in IRR scores during training set development. We also identified false negatives and revised our initial keyword list accordingly to ensure capture of prediabetes discussions. Finally, we applied the search criteria developed during keyword refinement to a new set of notes from unique clinics to reduce overfitting. There was a total of 96 different providers included in the 930 unique note snippets, which allowed the model to learn the vocabulary and writing styles of many different clinicians.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Limitations of our study include collection of data from a single health system. However, the clinics included represent urban and suburban sites serving patients of different socioeconomic levels and disease burden, improving generalizability. Providers at other institutions may use different medical terminology, not considered in this study, to describe “prediabetes.” This could limit generalizability outside of the home-trained institution. However, we took several steps to reduce institutional bias, including rigorous keyword refinement and application of the final lexical search to multiple clinics that do not share standardized templates to include many linguistic styles and patterns. We limited our note selection to the first encounter following the abnormal HbA<sub>1c</sub> result; although this could miss some dialogue about prediabetes, logically these discussions are most likely to occur close to the time of the abnormal result, and this decreased bias in our models. Finally, the note selection process, requiring at least one prediabetes keyword to enter our data set, limited our ability to calculate true recall. We minimized this issue by performing manual review on a subset of the charts that did not enter our data set, to ensure we did not have selection bias in our keyword search. Future studies may consider applying our NLP pipeline against a random sample of notes without requiring keyword selection to perform additional validations. Additionally, our study provides a baseline framework for identifying discussions of prediabetes. Next steps could apply NLP pipelines to identify when discussions about prediabetes meet the threshold for delivery of guideline-concordant care.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>Our NLP pipeline successfully identified prediabetes discussions in unstructured notes with precision approximating human annotation. This approach can be used to evaluate prediabetes counseling during patient visits and describe prediabetes management in primary care. Gathering these data is a critical step to inform interventions to improve the delivery of evidence-based prediabetes care to reduce the incidence of type 2 diabetes.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary methods and tables.</p>
        <media xlink:href="medinform_v10i2e29803_app1.docx" xlink:title="DOCX File , 30 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">HbA<sub>1c</sub></term>
          <def>
            <p>hemoglobin A<sub>1c</sub></p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">IRR</term>
          <def>
            <p>interrater reliability</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">JHU</term>
          <def>
            <p>Johns Hopkins University</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">NB</term>
          <def>
            <p>Gaussian naïve bayes</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">NLTK</term>
          <def>
            <p>Natural Language Toolkit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">PCP</term>
          <def>
            <p>primary care provider</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">PMAP</term>
          <def>
            <p>Precision Medicine Analytics Platform</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">SGD</term>
          <def>
            <p>stochastic gradient descent</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">SVM</term>
          <def>
            <p>support vector machines</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">TF-IDF</term>
          <def>
            <p>term frequency-inverse document frequency</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the Johns Hopkins Institute for Clinical and Translational Research Core Coins Award 2018. ET was supported by the National Institute of Diabetes and Digestive and Kidney Diseases [K23DK118205]. JLS was supported by the National Heart, Lung, and Blood Institute [5T32HL007180, PI: Hill-Briggs].</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>NMM is the co-inventor of a virtual diabetes prevention program. Under a license agreement between Johns Hopkins HealthCare Solutions and the Johns Hopkins University, NMM and the University are entitled to royalty distributions related to this technology. This arrangement has been reviewed and approved by the Johns Hopkins University in accordance with its conflict of interest policies. This technology is not described in this study. JLS is a co-investigator on a research project funded by NovoNordisk Inc. The primary aim of the project is to create and pilot a clinical decision support tool to assist clinicians when talking to their patients about weight and obesity treatment. This project is not addressed or referenced in this publication.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>National Diabetes Statistics Report, 2020: Estimates of Diabetes and Its Burden in the United States</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2022-01-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/diabetes/pdfs/data/statistics/national-diabetes-statistics-report.pdf">https://www.cdc.gov/diabetes/pdfs/data/statistics/national-diabetes-statistics-report.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>National Diabetes Statistics Report: Estimates of Diabetes and Its Burden in the United States</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2014</year>
          <access-date>2022-01-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/diabetes/data/statistics-report/index.html">https://www.cdc.gov/diabetes/data/statistics-report/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Diabetes Prevention Program Research Group</collab>
            <name name-style="western">
              <surname>Knowler</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Fowler</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Hamman</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Christophi</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brenneman</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Brown-Friday</surname>
              <given-names>JO</given-names>
            </name>
            <name name-style="western">
              <surname>Goldberg</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Venditti</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Nathan</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>10-year follow-up of diabetes incidence and weight loss in the Diabetes Prevention Program Outcomes Study</article-title>
          <source>Lancet</source>
          <year>2009</year>
          <month>12</month>
          <day>14</day>
          <volume>374</volume>
          <issue>9702</issue>
          <fpage>1677</fpage>
          <lpage>86</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19878986"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(09)61457-4</pub-id>
          <pub-id pub-id-type="medline">19878986</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(09)61457-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC3135022</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Knowler</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Barrett-Connor</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Fowler</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Hamman</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Lachin</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Walker</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Nathan</surname>
              <given-names>DM</given-names>
            </name>
            <collab>Diabetes Prevention Program Research Group</collab>
          </person-group>
          <article-title>Reduction in the incidence of type 2 diabetes with lifestyle intervention or metformin</article-title>
          <source>N Engl J Med</source>
          <year>2002</year>
          <month>03</month>
          <day>07</day>
          <volume>346</volume>
          <issue>6</issue>
          <fpage>393</fpage>
          <lpage>403</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/11832527"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMoa012512</pub-id>
          <pub-id pub-id-type="medline">11832527</pub-id>
          <pub-id pub-id-type="pii">346/6/393</pub-id>
          <pub-id pub-id-type="pmcid">PMC1370926</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Diabetes Prevention Program Research Group</collab>
          </person-group>
          <article-title>Long-term effects of lifestyle intervention or metformin on diabetes development and microvascular complications over 15-year follow-up: the Diabetes Prevention Program Outcomes Study</article-title>
          <source>Lancet Diabetes Endocrinol</source>
          <year>2015</year>
          <month>12</month>
          <volume>3</volume>
          <issue>11</issue>
          <fpage>866</fpage>
          <lpage>75</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26377054"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2213-8587(15)00291-0</pub-id>
          <pub-id pub-id-type="medline">26377054</pub-id>
          <pub-id pub-id-type="pii">S2213-8587(15)00291-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC4623946</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>American Diabetes Association</collab>
          </person-group>
          <article-title>3. Prevention or Delay of Type 2 Diabetes</article-title>
          <source>Diabetes Care</source>
          <year>2020</year>
          <month>01</month>
          <volume>43</volume>
          <issue>Suppl 1</issue>
          <fpage>S32</fpage>
          <lpage>S36</lpage>
          <pub-id pub-id-type="doi">10.2337/dc20-S003</pub-id>
          <pub-id pub-id-type="medline">31862746</pub-id>
          <pub-id pub-id-type="pii">43/Supplement_1/S32</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
          <article-title>National Diabetes Prevention Program</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2017</year>
          <access-date>2022-01-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cdc.gov/diabetes/prevention/index.htm">http://www.cdc.gov/diabetes/prevention/index.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Greer</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>O'Rourke</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yeh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>McGuire</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Maruthur</surname>
              <given-names>NM</given-names>
            </name>
          </person-group>
          <article-title>Survey of primary care providers' knowledge of screening for, diagnosing and managing prediabetes</article-title>
          <source>J Gen Intern Med</source>
          <year>2017</year>
          <month>12</month>
          <volume>32</volume>
          <issue>11</issue>
          <fpage>1172</fpage>
          <lpage>1178</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28730532"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-017-4103-1</pub-id>
          <pub-id pub-id-type="medline">28730532</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11606-017-4103-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5653548</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Greer</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>O'Rourke</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yeh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>McGuire</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Albright</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Marsteller</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Maruthur</surname>
              <given-names>NM</given-names>
            </name>
          </person-group>
          <article-title>National survey of primary care physicians' knowledge, practices, and perceptions of prediabetes</article-title>
          <source>J Gen Intern Med</source>
          <year>2019</year>
          <month>11</month>
          <volume>34</volume>
          <issue>11</issue>
          <fpage>2475</fpage>
          <lpage>2481</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31502095"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-019-05245-7</pub-id>
          <pub-id pub-id-type="medline">31502095</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11606-019-05245-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC6848700</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rhee</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Herrick</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ziemer</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Vaccarino</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Weintraub</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Narayan</surname>
              <given-names>KMV</given-names>
            </name>
            <name name-style="western">
              <surname>Kolm</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Twombly</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>LS</given-names>
            </name>
          </person-group>
          <article-title>Many Americans have pre-diabetes and should be considered for metformin therapy</article-title>
          <source>Diabetes Care</source>
          <year>2010</year>
          <month>01</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>49</fpage>
          <lpage>54</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19808929"/>
          </comment>
          <pub-id pub-id-type="doi">10.2337/dc09-0341</pub-id>
          <pub-id pub-id-type="medline">19808929</pub-id>
          <pub-id pub-id-type="pii">dc09-0341</pub-id>
          <pub-id pub-id-type="pmcid">PMC2797985</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karve</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hayward</surname>
              <given-names>RA</given-names>
            </name>
          </person-group>
          <article-title>Prevalence, diagnosis, and treatment of impaired fasting glucose and impaired glucose tolerance in nondiabetic U.S. adults</article-title>
          <source>Diabetes Care</source>
          <year>2010</year>
          <month>12</month>
          <volume>33</volume>
          <issue>11</issue>
          <fpage>2355</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20724649"/>
          </comment>
          <pub-id pub-id-type="doi">10.2337/dc09-1957</pub-id>
          <pub-id pub-id-type="medline">20724649</pub-id>
          <pub-id pub-id-type="pii">dc09-1957</pub-id>
          <pub-id pub-id-type="pmcid">PMC2963494</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Duru</surname>
              <given-names>OK</given-names>
            </name>
            <name name-style="western">
              <surname>Ettner</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Turk</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Keckhafer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mangione</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>Metformin prescription for insured adults with prediabetes from 2010 to 2012: a retrospective cohort study</article-title>
          <source>Ann Intern Med</source>
          <year>2015</year>
          <month>05</month>
          <day>21</day>
          <volume>162</volume>
          <issue>8</issue>
          <fpage>542</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25894024"/>
          </comment>
          <pub-id pub-id-type="doi">10.7326/M14-1773</pub-id>
          <pub-id pub-id-type="medline">25894024</pub-id>
          <pub-id pub-id-type="pii">2275385</pub-id>
          <pub-id pub-id-type="pmcid">PMC4682357</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Centers for Disease Control and Prevention (CDC)</collab>
          </person-group>
          <article-title>Awareness of prediabetes--United States, 2005-2010</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2013</year>
          <month>03</month>
          <day>22</day>
          <volume>62</volume>
          <issue>11</issue>
          <fpage>209</fpage>
          <lpage>12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/mmwr/preview/mmwrhtml/mm6211a4.htm"/>
          </comment>
          <pub-id pub-id-type="medline">23515058</pub-id>
          <pub-id pub-id-type="pii">mm6211a4</pub-id>
          <pub-id pub-id-type="pmcid">PMC4604913</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schmittdiel</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Segal</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Roumie</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Ohnsorg</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>Novel use and utility of integrated electronic health records to assess rates of prediabetes recognition and treatment: brief report from an integrated electronic health records pilot study</article-title>
          <source>Diabetes Care</source>
          <year>2014</year>
          <month>03</month>
          <volume>37</volume>
          <issue>2</issue>
          <fpage>565</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24271190"/>
          </comment>
          <pub-id pub-id-type="doi">10.2337/dc13-1223</pub-id>
          <pub-id pub-id-type="medline">24271190</pub-id>
          <pub-id pub-id-type="pii">dc13-1223</pub-id>
          <pub-id pub-id-type="pmcid">PMC3898765</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sheikhalishahi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Lavelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rinaldi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Osmani</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of clinical notes on chronic diseases: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>05</month>
          <day>27</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>e12239</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/2/e12239/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12239</pub-id>
          <pub-id pub-id-type="medline">31066697</pub-id>
          <pub-id pub-id-type="pii">v7i2e12239</pub-id>
          <pub-id pub-id-type="pmcid">PMC6528438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Silva</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jönsson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Demmer</surname>
              <given-names>RT</given-names>
            </name>
          </person-group>
          <article-title>A combined strategy of feature selection and machine learning to identify predictors of prediabetes</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>03</month>
          <day>01</day>
          <volume>27</volume>
          <issue>3</issue>
          <fpage>396</fpage>
          <lpage>406</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31889178"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocz204</pub-id>
          <pub-id pub-id-type="medline">31889178</pub-id>
          <pub-id pub-id-type="pii">5691201</pub-id>
          <pub-id pub-id-type="pmcid">PMC7647289</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>DW</given-names>
            </name>
          </person-group>
          <article-title>Screening for pre-diabetes using support vector machine model</article-title>
          <source>Annu Int Conf IEEE Eng Med Biol Soc</source>
          <year>2014</year>
          <volume>2014</volume>
          <fpage>2472</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1109/EMBC.2014.6944123</pub-id>
          <pub-id pub-id-type="medline">25570491</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maeta</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nishiyama</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fujibayashi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gunji</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sasabe</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Iijima</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Naito</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Prediction of glucose metabolism disorder risk using a machine learning algorithm: pilot study</article-title>
          <source>JMIR Diabetes</source>
          <year>2018</year>
          <month>12</month>
          <day>26</day>
          <volume>3</volume>
          <issue>4</issue>
          <fpage>e10212</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://diabetes.jmir.org/2018/4/e10212/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/10212</pub-id>
          <pub-id pub-id-type="medline">30478026</pub-id>
          <pub-id pub-id-type="pii">v3i4e10212</pub-id>
          <pub-id pub-id-type="pmcid">PMC6288596</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Parikh</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Shenfeld</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Ivanov</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Marks</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Church</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Laramie</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Mardekian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Piper</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Willke</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rublee</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>Reverse engineering and evaluation of prediction models for progression to type 2 diabetes: an application of machine learning using electronic health records</article-title>
          <source>J Diabetes Sci Technol</source>
          <year>2015</year>
          <month>12</month>
          <day>20</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>6</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/1932296815620200?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/1932296815620200</pub-id>
          <pub-id pub-id-type="medline">26685993</pub-id>
          <pub-id pub-id-type="pii">1932296815620200</pub-id>
          <pub-id pub-id-type="pmcid">PMC4738229</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>TK</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>DW</given-names>
            </name>
          </person-group>
          <article-title>Screening for prediabetes using machine learning models</article-title>
          <source>Comput Math Methods Med</source>
          <year>2014</year>
          <volume>2014</volume>
          <fpage>618976</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2014/618976"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2014/618976</pub-id>
          <pub-id pub-id-type="medline">25165484</pub-id>
          <pub-id pub-id-type="pmcid">PMC4140121</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Acciaroli</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sparacino</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hakaste</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Facchinetti</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Di Nunzio</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Palombit</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tuomi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gabriel</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Aranda</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vega</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cobelli</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Diabetes and prediabetes classification using glycemic variability indices from continuous glucose monitoring data</article-title>
          <source>J Diabetes Sci Technol</source>
          <year>2018</year>
          <month>01</month>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>105</fpage>
          <lpage>113</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28569077"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/1932296817710478</pub-id>
          <pub-id pub-id-type="medline">28569077</pub-id>
          <pub-id pub-id-type="pmcid">PMC5761967</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Shankaracharya</collab>
            <name name-style="western">
              <surname>Odedra</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Samanta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vidyarthi</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>Computational intelligence-based diagnosis tool for the detection of prediabetes and type 2 diabetes in India</article-title>
          <source>Rev Diabet Stud</source>
          <year>2012</year>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>55</fpage>
          <lpage>62</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.soc-bdr.org/content/e4/articlelookup?showfulltext=1&#38;volume=9&#38;firstpage=55"/>
          </comment>
          <pub-id pub-id-type="doi">10.1900/RDS.2012.9.55</pub-id>
          <pub-id pub-id-type="medline">22972445</pub-id>
          <pub-id pub-id-type="pmcid">PMC3448174</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Che</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>IGRNet: a deep learning model for non-invasive, real-time diagnosis of prediabetes through electrocardiograms</article-title>
          <source>Sensors (Basel)</source>
          <year>2020</year>
          <month>05</month>
          <day>30</day>
          <volume>20</volume>
          <issue>9</issue>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=s20092556"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/s20092556</pub-id>
          <pub-id pub-id-type="medline">32365875</pub-id>
          <pub-id pub-id-type="pii">s20092556</pub-id>
          <pub-id pub-id-type="pmcid">PMC7248708</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dinh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Miertschin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mohanty</surname>
              <given-names>SD</given-names>
            </name>
          </person-group>
          <article-title>A data-driven approach to predicting diabetes and cardiovascular disease with machine learning</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <month>11</month>
          <day>06</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>211</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-0918-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-0918-5</pub-id>
          <pub-id pub-id-type="medline">31694707</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-0918-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC6836338</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cahn</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shoshan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sagiv</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yesharim</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Goshen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Shalev</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Raz</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Prediction of progression from pre-diabetes to diabetes: Development and validation of a machine learning model</article-title>
          <source>Diabetes Metab Res Rev</source>
          <year>2020</year>
          <month>02</month>
          <volume>36</volume>
          <issue>2</issue>
          <fpage>e3252</fpage>
          <pub-id pub-id-type="doi">10.1002/dmrr.3252</pub-id>
          <pub-id pub-id-type="medline">31943669</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garcia-Carretero</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Vigil-Medina</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mora-Jimenez</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Soguero-Ruiz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Barquero-Perez</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Ramos-Lopez</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Use of a K-nearest neighbors model to predict the development of type 2 diabetes within 2 years in an obese, hypertensive population</article-title>
          <source>Med Biol Eng Comput</source>
          <year>2020</year>
          <month>05</month>
          <volume>58</volume>
          <issue>5</issue>
          <fpage>991</fpage>
          <lpage>1002</lpage>
          <pub-id pub-id-type="doi">10.1007/s11517-020-02132-w</pub-id>
          <pub-id pub-id-type="medline">32100174</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11517-020-02132-w</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>McElhinney</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Culver</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Alfreds</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Stearns</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sylvester</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>Widen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>XB</given-names>
            </name>
          </person-group>
          <article-title>Defining and characterizing the critical transition state prior to the type 2 diabetes disease</article-title>
          <source>PLoS One</source>
          <year>2017</year>
          <volume>12</volume>
          <issue>7</issue>
          <fpage>e0180937</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0180937"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0180937</pub-id>
          <pub-id pub-id-type="medline">28686739</pub-id>
          <pub-id pub-id-type="pii">PONE-D-17-10439</pub-id>
          <pub-id pub-id-type="pmcid">PMC5501620</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pomares-Quimbaya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kreuzthaler</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Current approaches to identify sections within clinical narratives from electronic health records: a systematic review</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2019</year>
          <month>07</month>
          <day>18</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>155</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-019-0792-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12874-019-0792-y</pub-id>
          <pub-id pub-id-type="medline">31319802</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12874-019-0792-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC6637496</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Honnibal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Montani</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>spaCy 2: Natural language understanding with Bloom embeddings, convolution neural networks and incremental parsing</article-title>
          <source>spacy.io</source>
          <year>2017</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://spacy.io/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loper</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bird</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>NLTK: the natural language toolkit</article-title>
          <year>2002</year>
          <conf-name>ACL-02 Workshop on Effective tools and methodologies for teaching natural language processing and computational linguistics</conf-name>
          <conf-date>July 7, 2002</conf-date>
          <conf-loc>Philadelphia, PA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3115/1118108.1118117"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/1118108.1118117</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perrot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duchesnay</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>Journal of Machine Learning Research</source>
          <year>2011</year>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2786984.2786995</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rouhizadeh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jaidka</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Buffone</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>LH</given-names>
            </name>
          </person-group>
          <article-title>Identifying locus of control in social media language</article-title>
          <year>2018</year>
          <conf-name>Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>October 31 - November 4, 2018</conf-date>
          <conf-loc>Brussels, Belgium</conf-loc>
          <fpage>1146</fpage>
          <lpage>52</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/D18-1145.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d18-1145</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Park</surname>
              <given-names>MY</given-names>
            </name>
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>L1 regularization path algorithm for generalized linear models</article-title>
          <source>Journal of the Royal Statistical Society. Series B (Statistical Methodology)</source>
          <year>2007</year>
          <volume>69</volume>
          <issue>4</issue>
          <fpage>659</fpage>
          <lpage>677</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jstor.org/stable/4623289"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1467-9868.2007.00607.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hosmer</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Lemeshow</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sturdivant</surname>
              <given-names>RX</given-names>
            </name>
          </person-group>
          <source>Applied Logistic Regression, 3rd Edition</source>
          <year>2013</year>
          <publisher-loc>Hoboken, NJ</publisher-loc>
          <publisher-name>John Wiley &#38; Sons, Ltd</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cortes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vapnik</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Support-vector networks</article-title>
          <source>Machine Learning</source>
          <year>1995</year>
          <volume>20</volume>
          <fpage>273</fpage>
          <lpage>297</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://link.springer.com/content/pdf/10.1007/BF00994018.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/bf00994018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bottou</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Lechevallier</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Saporta</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Large-Scale Machine Learning with Stochastic Gradient Descent</article-title>
          <source>Proceedings of COMPSTAT'2010</source>
          <year>2010</year>
          <publisher-loc>Heidelberg, Germany</publisher-loc>
          <publisher-name>Physica-Verlag HD</publisher-name>
          <fpage>177</fpage>
          <lpage>86</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Safavian</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Landgrebe</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A survey of decision tree classifier methodology</article-title>
          <source>IEEE Trans. Syst., Man, Cybern</source>
          <year>1991</year>
          <volume>21</volume>
          <issue>3</issue>
          <fpage>660</fpage>
          <lpage>674</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/97458"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/21.97458</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liaw</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wiener</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Classification and Regression by randomForest</article-title>
          <source>R News</source>
          <year>2002</year>
          <volume>2/3</volume>
          <fpage>18</fpage>
          <lpage>22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cogns.northwestern.edu/cbmg/LiawAndWiener2002.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Machine Learning</source>
          <year>2001</year>
          <volume>45</volume>
          <fpage>5</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rish</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>An Empirical Study of the Naive Bayes Classifier</article-title>
          <year>2001</year>
          <conf-name>IJCAI 2001 workshop on empirical methods in artificial intelligence</conf-name>
          <conf-date>August 4-6, 2001</conf-date>
          <conf-loc>Seattle, WA</conf-loc>
          <fpage>41</fpage>
          <lpage>46</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cc.gatech.edu/fac/Charles.Isbell/classes/reading/papers/Rish.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Semi-supervised convolutional neural networks for text categorization via region embedding</article-title>
          <source>Adv Neural Inf Process Syst</source>
          <year>2015</year>
          <month>12</month>
          <volume>28</volume>
          <fpage>919</fpage>
          <lpage>927</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27087766"/>
          </comment>
          <pub-id pub-id-type="medline">27087766</pub-id>
          <pub-id pub-id-type="pmcid">PMC4831869</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>A sensitivity analysis of (and practitioners' guide to) convolutional neural networks for sentence classification</article-title>
          <source>Cornell University</source>
          <year>2015</year>
          <access-date>2022-01-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1510.03820">https://arxiv.org/abs/1510.03820</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Neumann</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Beltagy</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Ammar</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>ScispaCy: Fast and Robust Models for Biomedical Natural Language Processing</article-title>
          <source>Cornell University</source>
          <year>2019</year>
          <access-date>2022-01-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1902.07669">https://arxiv.org/abs/1902.07669</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Honnibal</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Embed, encode, attend, predict: The new deep learning formula for state-of-the-art NLP models</article-title>
          <source>Explosion AI</source>
          <year>2016</year>
          <month>11</month>
          <day>9</day>
          <access-date>2022-01-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://explosion.ai/blog/deep-learning-formula-nlp">https://explosion.ai/blog/deep-learning-formula-nlp</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raschka</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>MLxtend: Providing machine learning and data science utilities and extensions to Python’s scientific computing stack</article-title>
          <source>JOSS</source>
          <year>2018</year>
          <month>04</month>
          <volume>3</volume>
          <issue>24</issue>
          <fpage>638</fpage>
          <pub-id pub-id-type="doi">10.21105/joss.00638</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Reaven</surname>
              <given-names>PD</given-names>
            </name>
            <name name-style="western">
              <surname>Saremi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Abbasi</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Migrino</surname>
              <given-names>RQ</given-names>
            </name>
            <collab>ACT NOW Study Investigators</collab>
          </person-group>
          <article-title>Machine learning to predict rapid progression of carotid atherosclerosis in patients with impaired glucose tolerance</article-title>
          <source>EURASIP J Bioinform Syst Biol</source>
          <year>2016</year>
          <month>12</month>
          <volume>2016</volume>
          <issue>1</issue>
          <fpage>14</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.1186/s13637-016-0049-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13637-016-0049-6</pub-id>
          <pub-id pub-id-type="medline">27642290</pub-id>
          <pub-id pub-id-type="pii">49</pub-id>
          <pub-id pub-id-type="pmcid">PMC5011483</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garcia-Carretero</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Vigil-Medina</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Barquero-Perez</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Ramos-Lopez</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Pulse wave velocity and machine learning to predict cardiovascular outcomes in prediabetic and diabetic populations</article-title>
          <source>J Med Syst</source>
          <year>2019</year>
          <month>12</month>
          <day>09</day>
          <volume>44</volume>
          <issue>1</issue>
          <fpage>16</fpage>
          <pub-id pub-id-type="doi">10.1007/s10916-019-1479-y</pub-id>
          <pub-id pub-id-type="medline">31820120</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10916-019-1479-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zeevi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Korem</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zmora</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Israeli</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rothschild</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Weinberger</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ben-Yacov</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lador</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Avnit-Sagi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lotan-Pompan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Suez</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mahdi</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Matot</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Malka</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kosower</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Rein</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zilberman-Schapira</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dohnalová</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Pevsner-Fischer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bikovsky</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Halpern</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Elinav</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Segal</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Personalized nutrition by prediction of glycemic responses</article-title>
          <source>Cell</source>
          <year>2015</year>
          <month>12</month>
          <day>19</day>
          <volume>163</volume>
          <issue>5</issue>
          <fpage>1079</fpage>
          <lpage>1094</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0092-8674(15)01481-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cell.2015.11.001</pub-id>
          <pub-id pub-id-type="medline">26590418</pub-id>
          <pub-id pub-id-type="pii">S0092-8674(15)01481-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Popp</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>St-Jules</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ganguzza</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Illiano</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Curran</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Schoenthaler</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bergman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Segal</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Godneva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sevick</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>The rationale and design of the personal diet study, a randomized clinical trial evaluating a personalized approach to weight loss in individuals with pre-diabetes and early-stage type 2 diabetes</article-title>
          <source>Contemp Clin Trials</source>
          <year>2019</year>
          <month>04</month>
          <volume>79</volume>
          <fpage>80</fpage>
          <lpage>88</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cct.2019.03.001</pub-id>
          <pub-id pub-id-type="medline">30844471</pub-id>
          <pub-id pub-id-type="pii">S1551-7144(18)30584-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ni</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cheung</surname>
              <given-names>CKY</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>KSL</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tse</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Panagiotou</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Gut Microbiome Fermentation Determines the Efficacy of Exercise for Diabetes Prevention</article-title>
          <source>Cell Metab</source>
          <year>2020</year>
          <month>01</month>
          <day>07</day>
          <volume>31</volume>
          <issue>1</issue>
          <fpage>77</fpage>
          <lpage>91.e5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1550-4131(19)30608-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cmet.2019.11.001</pub-id>
          <pub-id pub-id-type="medline">31786155</pub-id>
          <pub-id pub-id-type="pii">S1550-4131(19)30608-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Clinical concept extraction: A methodology review</article-title>
          <source>J Biomed Inform</source>
          <year>2020</year>
          <month>09</month>
          <volume>109</volume>
          <fpage>103526</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(20)30154-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2020.103526</pub-id>
          <pub-id pub-id-type="medline">32768446</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(20)30154-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC7746475</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
