<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i2e16878</article-id>
      <article-id pub-id-type="pmid">32130159</article-id>
      <article-id pub-id-type="doi">10.2196/16878</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Identifying Acute Low Back Pain Episodes in Primary Care Practice From Clinical Notes: Observational Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Osmani</surname>
            <given-names>Venet</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Boukhechba</surname>
            <given-names>Mehdi</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Miotto</surname>
            <given-names>Riccardo</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7815-6000</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Percha</surname>
            <given-names>Bethany L</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0988-4183</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Glicksberg</surname>
            <given-names>Benjamin S</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4515-8090</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>Hao-Chih</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1538-6175</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Cruz</surname>
            <given-names>Lisanne</given-names>
          </name>
          <degrees>MD, MSc, FAAPMR</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2894-0240</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Dudley</surname>
            <given-names>Joel T</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7036-6492</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Nabeel</surname>
            <given-names>Ismail</given-names>
          </name>
          <degrees>MD, MPH</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <address>
            <institution>Department of Environmental Medicine and Public Health</institution>
            <institution>Icahn School of Medicine at Mount Sinai</institution>
            <addr-line>17 East 102nd Street, Box 1043</addr-line>
            <addr-line>New York, NY, 10029</addr-line>
            <country>United States</country>
            <phone>1 (614) 423 9057</phone>
            <email>ismail.nabeel@icahn.mssm.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6909-1970</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Hasso Plattner Institute for Digital Health at Mount Sinai</institution>
        <institution>Icahn School of Medicine at Mount Sinai</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Institute for Next Generation Healthcare</institution>
        <institution>Icahn School of Medicine at Mount Sinai</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Genetics and Genomic Sciences</institution>
        <institution>Icahn School of Medicine at Mount Sinai</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Physical Medicine and Rehabilitation</institution>
        <institution>Icahn School of Medicine at Mount Sinai</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Environmental Medicine and Public Health</institution>
        <institution>Icahn School of Medicine at Mount Sinai</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Ismail Nabeel <email>ismail.nabeel@icahn.mssm.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>2</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>27</day>
        <month>2</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>2</issue>
      <elocation-id>e16878</elocation-id>
      <history>
        <date date-type="received">
          <day>1</day>
          <month>11</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>15</day>
          <month>12</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Riccardo Miotto, Bethany L Percha, Benjamin S Glicksberg, Hao-Chih Lee, Lisanne Cruz, Joel T Dudley, Ismail Nabeel. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 27.02.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2020/2/e16878/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Acute and chronic low back pain (LBP) are different conditions with different treatments. However, they are coded in electronic health records with the same International Classification of Diseases, 10th revision (ICD-10) code (M54.5) and can be differentiated only by retrospective chart reviews. This prevents an efficient definition of data-driven guidelines for billing and therapy recommendations, such as return-to-work options.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The objective of this study was to evaluate the feasibility of automatically distinguishing acute LBP episodes by analyzing free-text clinical notes.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used a dataset of 17,409 clinical notes from different primary care practices; of these, 891 documents were manually annotated as <italic>acute LBP</italic> and 2973 were generally associated with LBP via the recorded ICD-10 code. We compared different supervised and unsupervised strategies for automated identification: keyword search, topic modeling, logistic regression with bag of n-grams and manual features, and deep learning (a convolutional neural network-based architecture [ConvNet]). We trained the supervised models using either manual annotations or ICD-10 codes as positive labels.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>ConvNet trained using manual annotations obtained the best results with an area under the receiver operating characteristic curve of 0.98 and an F score of 0.70. ConvNet’s results were also robust to reduction of the number of manually annotated documents. In the absence of manual annotations, topic models performed better than methods trained using ICD-10 codes, which were unsatisfactory for identifying LBP acuity.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study uses clinical notes to delineate a potential path toward systematic learning of therapeutic strategies, billing guidelines, and management options for acute LBP at the point of care.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>electronic health records</kwd>
        <kwd>clinical notes</kwd>
        <kwd>low back pain</kwd>
        <kwd>natural language processing</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Low back pain (LBP) is one of the most common causes of disability in US adults younger than 45 years [<xref ref-type="bibr" rid="ref1">1</xref>], with 10 to 20% of American workers reporting persistent back pain [<xref ref-type="bibr" rid="ref2">2</xref>]. LBP impacts one’s ability to work and affects the quality of life. For example, in 2015, Luckhaupt et al showed that, from a pool of 19,441 people, 16.9% of workers with any LBP and 19.0% of those with frequent and severe LBP missed at least one full day of work over a period of 3 months [<xref ref-type="bibr" rid="ref3">3</xref>]. LBP events also lead to a significant financial burden for both individuals and clinical facilities, with combined direct and indirect costs of treatment for musculoskeletal injuries and associated pain estimated to be approximately US $213 billion annually [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
      <p>LBP events fall into 2 major categories: acute and chronic [<xref ref-type="bibr" rid="ref5">5</xref>]. Acute LBP occurs suddenly, usually associated with trauma or injury with subsequent pain, whereas chronic LBP is often reported by patients in regular checkups and has led to a significant increase in the use of health care services over the past two decades. It is very important to differentiate between acute and chronic LBP in the clinical setting as these conditions—as well as their management and billing—are substantively different. Chronic back pain is generally treated with spinal injections [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>], surgery [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>], and/or pain medications [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>], whereas anti-inflammatories and a rapid return to normal activities of daily living are generally the best recommendations for acute LBP [<xref ref-type="bibr" rid="ref12">12</xref>].</p>
      <p>However, acute and chronic LBP are usually not explicitly separated in electronic health records (EHRs) because of a lack of distinguishing codes. The International Classification of Diseases, 10th revision (ICD-10) standard only includes the code M54.5 to characterize <italic>low back pain</italic> diagnosis, and it does not provide modifiers to distinguish different LBP acuities [<xref ref-type="bibr" rid="ref13">13</xref>]. Acuity is usually reported in clinical notes, requiring a retrospective chart review of the free text to characterize LBP events, which is time consuming and not scalable [<xref ref-type="bibr" rid="ref14">14</xref>]. Moreover, acuity can be expressed in different ways. For example, the text could mention <italic>acute low back pain</italic> or <italic>acute LBP</italic>, but could also simply report <italic>shooting pain down into the lower extremities</italic>, <italic>limited spine range of motion, vertebral tenderness</italic>, <italic>diffuse pain in lumbar muscles</italic>, and so on [<xref ref-type="bibr" rid="ref15">15</xref>]. This variability makes it difficult for clinical facilities and researchers to group LBP episodes by acuity to perform key tasks, such as defining appropriate diagnostic and billing codes; evaluating the effectiveness of prescribed treatments; and deriving data-driven therapeutic guidelines and improved diagnostic methods that could reduce time, disability, and cost.</p>
      <p>This paper is the first to explore the use of automated approaches based on machine learning and information retrieval to analyze free-text clinical notes and identify the acuity of LBP episodes. Specifically, we use a set of manually annotated notes to train and evaluate various machine learning architectures based on logistic regression (LR), n-grams, topic models, word embeddings, and convolutional neural networks, and to demonstrate that some of these models are able to identify acute LBP episodes with promising precision. In addition, we demonstrate the ineffectiveness of using ICD-10 codes alone to train the models, reinforcing the idea that they are not sufficient to differentiate the acuity of LBP. Our overall objective was to build an automated framework that can help front line primary care providers (PCPs) in the development of targeted strategies and return-to-work (RTW) options for acute LBP episodes in clinical practice.</p>
      <sec>
        <title>Background and Significance</title>
        <p>PCPs are commonly the first medical practitioners to assess patient’s musculoskeletal injuries and pain associated with these injuries and are, therefore, in a unique position to offer reassurance, treatment options, and RTW recommendations catered to the acuity of the injury and pain associated with it. Several studies have documented increases in medication prescriptions and visits to physicians, physical therapists, and chiropractors for LBP episodes [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]. As individuals with chronic LBP seek care and use health care services more frequently than those with acute LBP, increases in health care use and costs for back pain are driven more by chronic than acute cases [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
        <p>A rapid return to normal activities of daily living, including work, is generally the best activity recommendation for acute LBP management [<xref ref-type="bibr" rid="ref12">12</xref>]. The number of workdays that are lost because of acute LBP can be reduced by implementing clinical practice guidelines in the primary care setting [<xref ref-type="bibr" rid="ref20">20</xref>]. In previous work, Cruz et al built an RTW protocol tool for PCPs based on guidelines from the LBP literature [<xref ref-type="bibr" rid="ref21">21</xref>]. On the basis of the type of work (eg, clerical, manual, or heavy) and the severity of the condition, the doctor would recommend RTW options (in partial or full duty capacity) within a certain number of days. The study found that physicians were likely to use this protocol, especially when it was integrated into the EHRs. However, the protocol was not always used for patients suffering from acute LBP as the research team was unable to quickly identify the acuity using only the structured EHR data (eg., ICD-10 codes). Acuity information was only available in the progress notes and was thus not incorporated into the automated recommendations. This prevented the research team from providing accurate feedback to PCPs based on a full picture of the patient’s condition. A similar tool that could incorporate acuity information from notes could provide much more specific recommendations to PCPs that incorporate best practice guidelines for each acuity level. Besides leading to more precise care, this would streamline billing for LBP [<xref ref-type="bibr" rid="ref22">22</xref>]. Similar needs arise for other musculoskeletal conditions, such as knee, elbow, and shoulder pain, where ICD-10 codes do not differentiate by pain level and acuity [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>].</p>
        <p>Machine learning methods for EHR data processing are enabling improved understanding of patient clinical trajectories, creating opportunities to derive new clinical insights [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. In recent years, the application of deep learning, a hierarchical computational design based on layers of neural networks [<xref ref-type="bibr" rid="ref27">27</xref>], to structured EHRs has led to promising results on clinical tasks such as disease phenotyping and prediction [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref33">33</xref>]. However, a wealth of relevant clinical information remains locked behind clinical narratives in the free text of notes. Natural language processing (NLP)—a branch of computer science that enables machines to process human language [<xref ref-type="bibr" rid="ref34">34</xref>] for applications such as machine translation [<xref ref-type="bibr" rid="ref35">35</xref>], text generation [<xref ref-type="bibr" rid="ref36">36</xref>], and image captioning [<xref ref-type="bibr" rid="ref37">37</xref>]—has been used to parse clinical notes to extract relevant insights that can guide clinical decisions [<xref ref-type="bibr" rid="ref38">38</xref>]. Recent applications of deep learning to clinical NLP have classified clinical notes according to diagnosis or disease codes [<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref41">41</xref>], predicted disease onset [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref42">42</xref>], and extracted primary cancer sites and their laterality in pathology reports [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>]. However, although deep learning has successfully been applied to analyze clinical notes, traditional methods are still preferable when training data are limited [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>].</p>
        <p>Regardless of the specific methodology, tools based on NLP applied to clinical narratives have not been widely used in clinical settings [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref38">38</xref>], despite the fact that physicians are likely to follow computer-assisted guidelines if recommendations are tied to their own observations [<xref ref-type="bibr" rid="ref47">47</xref>]. In this paper, we present an NLP-based framework that can help physicians adhere to best practices and RTW recommendations for LBP. To the best of our knowledge, there are no studies to date that have applied machine learning to clinical notes to distinguish the acuity of a musculoskeletal condition in cases where it is not explicitly coded.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>The conceptual steps of this study are summarized in <xref rid="figure1" ref-type="fig">Figure 1</xref>, specifically dataset composition, text processing, clinical notes modeling, and experimental evaluation. The overall goal was to evaluate the feasibility of automatically identifying clinical notes reporting <italic>acute LBP</italic> episodes.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Conceptual framework used to evaluate the use of automated approaches based on machine learning and information retrieval to analyze free-text clinical notes and identify acute low back pain episodes (a). The various unsupervised and supervised machine learning approaches used for clinical note modeling (b). ConvNet: convolutional neural network-based architecture; ICD-10: international classification of diseases, 10th revision.</p>
          </caption>
          <graphic xlink:href="medinform_v8i2e16878_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Dataset</title>
        <p>We used a set of free-text clinical notes extracted from the Mount Sinai data warehouse, made available for use under institutional review board approval following Health Insurance Portability and Accountability Act guidelines. The Mount Sinai Health System is an urban tertiary care hospital located in the Upper East Side of Manhattan in New York City. It generates a high volume of structured, semistructured, and unstructured data as part of its routine health care and clinical operations, which include inpatient, outpatient, and emergency room visits. These clinical notes were collected during a previous pilot study evaluating an RTW tool based on EHR data that included nearly 40,000 encounters for 15,715 patients spanning from 2016 to 2018 and clinical notes written by 81 different providers [<xref ref-type="bibr" rid="ref21">21</xref>]. In that study, we used the published literature to develop a list of guidelines to determine the assessment and management of acute LBP episodes in clinical practice. In particular, we used ICD-10 codes and other parameters, such as <italic>presenting complaint</italic>, <italic>pre-existing conditions</italic>, <italic>management factors</italic>, and <italic>imaging/radiology/test ordered</italic>, to define and label the acuity of LBP in a clinical encounter. Following these guidelines, 14 individuals (physical medicine and rehabilitation fellows, residents, and medical students) manually reviewed a random set of 4291 clinical notes associated with these encounters and labeled all <italic>acute low back pain</italic> events. Each note was reviewed by at least two individuals and was further checked by a lead physician researcher if it was marked as ambiguous and/or there was discordance between reviewers.</p>
        <p>This project leveraged the entire set of clinical notes that were collected in the previous study. In particular, we joined all the progress notes of these encounters under the same initial visit, and we eliminated duplicate, short (less than 3 words), and nonmeaningful reports. The final dataset was composed of 17,409 distinct clinical notes, with length ranging from 7 to 6638 words. Of this set, 3092 notes were manually reviewed in the previous study, and 891 of them were annotated as <italic>acute LBP</italic>. The remaining 14,317 notes were not manually evaluated and were related to different clinical domains, including various musculoskeletal disorders and potentially LBP events. In this final dataset, 1973 notes were also associated with an encounter billed with an ICD-10 M54.5 <italic>Low back pain</italic> code.</p>
      </sec>
      <sec>
        <title>Text Processing</title>
        <p>Every note in the dataset was tokenized, divided into sentences, and checked to remove punctuation; numbers; and nonrelevant concepts such as URLs, emails, and dates. Each note was then represented as a list of sentences, with every sentence being a list of lemmatized words represented as one-hot encodings. The vocabulary was composed of all the words appearing at least five times in the training set. The discarded words were corrected to the terms in the vocabulary having the minimum edit distance, that is, the minimum number of operations required to transform one string into the other [<xref ref-type="bibr" rid="ref48">48</xref>]. This step reduced the number of misspelled words and prevented the accidental discarding of relevant information; at the same time, it also limited the size of the vocabulary to improve scalability [<xref ref-type="bibr" rid="ref39">39</xref>]. Overall, the vocabulary covering the whole dataset comprised 56,142 unique words.</p>
      </sec>
      <sec>
        <title>Clinical Note Modeling</title>
        <p>We evaluated different approaches for identifying clinical notes that refer to acute LBP episodes. These included both supervised and unsupervised methods. Although we benefited from the use of high-quality manual annotations to train the supervised models, we also investigated alternatives that did not require manual annotation of notes. All these methods provided straightforward explanations of their predictions, enabling us to validate each model and to identify parts of text and patterns that are relevant to the <italic>acute LBP</italic> predictions.</p>
        <sec>
          <title>Keyword Search</title>
          <p>We searched for a set of relevant keywords in the text. In particular, we looked for “acute low back pain,” “acute lbp,” “acute low bp,” and “acute back pain,” and we counted their occurrences in the text. We used the NegEx algorithm [<xref ref-type="bibr" rid="ref49">49</xref>] to annotate and remove negated occurrences of the keywords. In the evaluation, we refer to this model as <italic>WordSearch</italic>.</p>
        </sec>
        <sec>
          <title>Topic Modeling</title>
          <p>We used topic modeling on the full set of words contained in the notes to capture abstract topics referred to in the dataset [<xref ref-type="bibr" rid="ref50">50</xref>]. Topic modeling is an unsupervised inference process, in this case, implemented using latent Dirichlet allocation [<xref ref-type="bibr" rid="ref51">51</xref>], which captures patterns of word co-occurrences within documents to define interpretable topics (ie, multinomial distribution of words) and represent a document as a multinomial over these topics. Every document can then be classified as talking about 1 or (usually) more topics. Topic modeling is often used in health care to generalize clinical notes, improve the automatic processing of patient data, and explore clinical datasets [<xref ref-type="bibr" rid="ref52">52</xref>-<xref ref-type="bibr" rid="ref55">55</xref>].</p>
          <p>In this study, we assumed that 1 or more of these topics might refer to acute LBP. To discover them, we identified the most likely topics for a set of keywords (ie, “acute,” “low,” “back,” “pain,” “lbp,” and “bp”), and we manually reviewed them to retain only those that seemed more likely to characterize acute LBP episodes (ie, that included most of the keywords with high probability). We then considered the maximum likelihood among these topics as the probability that a report referred to acute LBP (ie, <italic>TopicModel</italic> in the experiments).</p>
        </sec>
        <sec>
          <title>Bag of N-Grams</title>
          <p>Each clinical note was represented as a bag of n-grams (BoN; with n=1, ..., 5), with term frequency-inverse document frequency (TF-IDF) weights (determined from the corpus of documents). Each n-gram is a contiguous sequence of <italic>n</italic> words from the text. We considered all the words in the vocabulary and filtered the common stop words based on the English dictionary before building all the n-grams. The classification was implemented using LR with least absolute shrinkage and selection operator (LASSO; ie, <italic>BoN-LR</italic>).</p>
        </sec>
        <sec>
          <title>Feature Engineering</title>
          <p>We used the protocol built by Cruz et al [<xref ref-type="bibr" rid="ref21">21</xref>] to define acute LBP episodes in the clinical notes. In particular, we used all the concepts described in that guideline, preprocessed them with the same algorithm used for the clinical notes, and built a set of 5154 distinct n-grams (with n=1, ..., 5), which we refer to as <italic>FeatEng</italic>. We then represented each clinical note as a bag of FeatEng (ie, we counted the occurrences of only these n-grams in the text), normalized with TF-IDF weights, and classified them using LR with LASSO (ie, <italic>FeatEng-LR</italic>).</p>
        </sec>
        <sec>
          <title>Deep Learning</title>
          <p>We implemented an end-to-end deep neural network architecture based on convolutional neural networks that takes as input the full note and outputs its probability of being related to <italic>acute LBP</italic> (ie, <italic>ConvNet</italic> in the experiments). The first layer of the architecture maps the words to dense vector representations (ie, <italic>embeddings</italic>), which attempt to contextualize the semantic meaning of each word by creating a metric space where vectors of semantically similar words are close to each other. We applied word2vec with the skip-gram algorithm to the parsed notes [<xref ref-type="bibr" rid="ref56">56</xref>] to initialize the embedding of each word in the vocabulary. Word2vec is commonly used with EHRs to learn embeddings of medical concepts from structured data and clinical notes [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref57">57</xref>-<xref ref-type="bibr" rid="ref59">59</xref>].</p>
          <p>The embeddings were then fed to a convolutional neural network inspired by the model described by Kim [<xref ref-type="bibr" rid="ref60">60</xref>] and Liu et al [<xref ref-type="bibr" rid="ref42">42</xref>]. This architecture concatenates representations of the text at different levels of abstraction by essentially choosing the most relevant n-grams at each level. Here, we first applied a set of parallel 1 dimensional (1D) convolutions on the input sequence with kernel sizes ranging from 1 to 5, thus simulating n-grams with n=1, ..., 5. The outputs of each of these convolutions were then max-pooled over the whole sequence and concatenated to a 5 × <italic>d</italic> dimensional vector, where <italic>d</italic> is the number of 1D convolutional filters. This representation was then fed to sequences of fully connected layers, which learn the interactions between the text features, and finally to a sigmoid layer that outputs the prediction probability.</p>
          <p>The n-grams that are most relevant to the prediction, in this architecture, are those that activate the neurons in the max-pooling layer. Therefore, we used the log-odds that the n-gram contributes to the sigmoid decision function [<xref ref-type="bibr" rid="ref42">42</xref>] as an indication of how much each n-gram influences the decision.</p>
        </sec>
      </sec>
      <sec>
        <title>Evaluation Design</title>
        <p>We evaluated all the architectures using a 10-fold cross-validation experiment, with every note appearing in the test set only once. In each training set, we used a random 90/10 split to train and validate all the model configurations. As baseline, we also report the results obtained by considering as <italic>acute LBP</italic> all the notes associated with the <italic>Low back pain</italic> M54.5 ICD-10 code (ie, <italic>ICD-10</italic> in the results).</p>
        <sec>
          <title>Training Annotations</title>
          <p>We considered 2 different sets of annotations as gold standards to train the supervised models. In the first experiment, we used the manually curated annotations provided with the dataset from previous work [<xref ref-type="bibr" rid="ref21">21</xref>], whereas in the second experiment, we trained the models using the ICD-10 codes associated with each note encounter. Both experiments were evaluated using manual annotations. The rationale was to compare the feasibility of identifying acute LBP events when manual annotations are and are not available. We trained the classifier to output <italic>acute LBP</italic> versus <italic>other</italic> because the goal of the project was to identify clinical notes with acute LBP events rather than discriminate different facets of LBP events (eg, <italic>chronic LBP</italic> vs <italic>acute LBP</italic>).</p>
        </sec>
        <sec>
          <title>Metrics</title>
          <p>For all experiments, we report area under the receiver operating characteristic curve (AUC-ROC); precision, recall, and F score; and area under the precision-recall curve (AUC-PRC) [<xref ref-type="bibr" rid="ref61">61</xref>]. The ROC curve is a plot of true positive rate versus false positive rate found over the set of predictions. F score is the harmonic mean of classification precision and recall per annotation, where precision is the number of correct positive results divided by the number of all positive results, and recall is the number of correct positive results divided by the number of positive results that should have been returned. The PRC is a plot of precision and recall for different thresholds. The areas under the ROC and PR curves are computed by integrating the corresponding curves.</p>
        </sec>
        <sec>
          <title>Model Hyperparameters</title>
          <p>The model hyperparameters were empirically tuned using the validation sets to optimize the results with both training annotations. In the topic modeling method, we inferred topics using the whole training set of documents and 200 topics (derived using perplexity analysis). Although seemingly more intuitive, using only the notes associated with the M54.5 <italic>Low back pain</italic> ICD-10 code actually produced worse results. For each fold, the most relevant topics associated with acute LBP were manually reviewed and used to annotate the notes. In the deep learning architecture, we used embeddings with size 300 and full-length notes. We trained word2vec just on the clinical note dataset to initialize embeddings. Preinitializing the embeddings with a general-purpose corpus did not lead to any improvement. Each convolutional neural network had 200 filters and used a rectified linear unit (ReLu) activation function. We added 2 fully connected layers of size 600 following the convolutional neural networks with ReLu activations and batch normalization. Dropout values across the layers were all set to 0.5. The architecture was trained using cross-entropy loss with the Adam optimizer for 5 epochs and batch size 32 (learning rate=0.001). The classification thresholds for precision, recall, and F score were found by ranging the value from 0.1 to 1, with 0.1 increments, and retaining, for each model, the value leading to the best results on the validation set.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p><xref ref-type="table" rid="table1">Table 1</xref> and <xref rid="figure2" ref-type="fig">Figure 2</xref> show the average results of the 10-fold cross-validation experiment for all the models considered. The best results were obtained by convolutional neural network-based architecture (ConvNet) when trained with the manual annotations. Although this is not entirely surprising given the success of deep learning for NLP when high-quality annotations and a large amount of data (ie, on the order of millions of training examples) are available, this was not certain in this domain where the training dataset was much smaller. As expected, the results obtained by the baseline and by training the models using the ICD-10 codes were not as good, confirming that the M54.5 ICD-10 code is not a sufficient indicator of acute LBP. TopicModel leads to similar performance but provides a more intuitive and potentially effective way for exploring the collection, extracting meaningful patterns that are related to acute LBP episodes. The most relevant topics included words defining acute LBP (eg, acute, low, back, pain, lbp, spasm, lifting, sciatica) and also included several medications that are usually prescribed to treat inflammation and pain (eg, Cyclobenzaprine, Flexeril, and Advil). Although this approach might not be robust enough for clinical application, a refined and manually curated version of TopicModel promises to allow an efficient prefiltering of clinical reports that can speed up the manual work required to annotate them. On the contrary, but as expected, WordSearch performed poorly as the condition is mentioned in too many different ways across the text, and simple keywords were not sufficient.</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>The classification results in identifying clinical notes with acute low back pain (LBP) episodes averaged over the 10-fold cross-validation experiment. We compared different supervised and unsupervised strategies: keyword search (WordSearch), topic modeling (TopicModel), logistic regression with bag of n-grams (BoN-LR) and manual features (FeatEng-LR), and deep learning (ConvNet). The supervised models (ie, BoN-LR, FeatEng-LR, and ConvNet) were trained using manual annotations or M54.5 International Classification of Diseases, 10th revision (ICD-10) codes. The ICD-10 baseline simply considered as acute LBP all the notes associated with the generic M54.5 Low back pain ICD-10 code.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="130"/>
          <col width="110"/>
          <col width="70"/>
          <col width="70"/>
          <col width="340"/>
          <col width="250"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Model</td>
              <td>Precision</td>
              <td>Recall</td>
              <td>F score</td>
              <td>Area under the receiver operating characteristic curve</td>
              <td>Area under the precision-recall curve</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="7">
                <bold>Baseline</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>ICD-10<sup>a</sup></td>
              <td>0.32</td>
              <td>0.68</td>
              <td>0.41</td>
              <td>0.81</td>
              <td>0.42</td>
            </tr>
            <tr valign="top">
              <td colspan="7">
                <bold>Unsupervised methods</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>WordSearch</td>
              <td>0.71</td>
              <td>0.03</td>
              <td>0.06</td>
              <td>0.52</td>
              <td>0.40</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>TopicModel</td>
              <td>0.44</td>
              <td>0.58</td>
              <td>0.50</td>
              <td>0.92</td>
              <td>0.46</td>
            </tr>
            <tr valign="top">
              <td colspan="7">
                <bold>Trained with the M54.5 ICD-10 code</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>BoN-LR<sup>b</sup></td>
              <td>0.50</td>
              <td>0.70</td>
              <td>0.59</td>
              <td>0.83</td>
              <td>0.42</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>FeatEng-LR<sup>c</sup></td>
              <td>0.47</td>
              <td>0.59</td>
              <td>0.52</td>
              <td>0.88</td>
              <td>0.41</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>ConvNet<sup>d</sup></td>
              <td>0.55</td>
              <td>0.68</td>
              <td>0.61</td>
              <td>0.89</td>
              <td>0.46</td>
            </tr>
            <tr valign="top">
              <td colspan="7">
                <bold>Trained with manual annotations</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>BoN-LR</td>
              <td>0.53</td>
              <td>0.64</td>
              <td>0.58</td>
              <td>0.93</td>
              <td>0.56</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>FeatEng-LR</td>
              <td>0.58</td>
              <td>0.66</td>
              <td>0.62</td>
              <td>0.93</td>
              <td>0.58</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>ConvNet</td>
              <td>0.65</td>
              <td>0.73</td>
              <td>0.70</td>
              <td>0.98</td>
              <td>0.72</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table1fn1">
            <p><sup>a</sup>ICD-10: International Classification of Diseases, 10th revision codes.</p>
          </fn>
          <fn id="table1fn2">
            <p><sup>b</sup>BoN-LR: logistic regression with bag of n-grams.</p>
          </fn>
          <fn id="table1fn3">
            <p><sup>c</sup>FeatEng-LR: logistic regression with feature engineering.</p>
          </fn>
          <fn id="table1fn4">
            <p><sup>d</sup>ConvNet: convolutional neural network-based architecture.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <fig id="figure2" position="float">
        <label>Figure 2</label>
        <caption>
          <p>Receiver operating characteristic and precision-recall curves obtained when using as training data for BoN-LR, FeatEng-LR and ConvNet the manual annotations (a) and the M54.5 ICD-10 codes (b). ConvNet trained using the manual annotations obtained the best results. In the absence of manual annotations to use for training, TopicModel worked better than methods trained using ICD-10 codes, which proved not to be a good indicator to identify acuity in low back pain episodes. BoN-LR: logistic regression with bag of n-grams; ConvNet: convolutional neural network-based architecture; FeatEng-LR: logistic regression with feature engineering; ICD-10: international classification of diseases, 10th revision; PR: precision-recall; ROC: receiver operating characteristic.</p>
        </caption>
        <graphic xlink:href="medinform_v8i2e16878_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p><xref rid="figure3" ref-type="fig">Figure 3</xref> shows the classification results in terms of AUC-ROC and AUC-PRC when randomly subsampling the <italic>acute LBP</italic> manual annotations in the training set. We found that ConvNet always outperforms the other methods based on LR as well as TopicModel. In addition, we notice that using just 240 out of 800 (30.0%) manual annotations in the training set already leads to better results than using ICD-10 codes as training labels. This is a particularly interesting insight as it shows that only minimal manual work is required to achieve good classifications; these can then be further improved by adding automatically annotated notes to the model (after manual verification) and retraining.</p>
      <fig id="figure3" position="float">
        <label>Figure 3</label>
        <caption>
          <p>Area under the receiver operating characteristic and precision-recall curves obtained when training the supervised models using random subsamples of the manual annotations. TopicModel is reported as reference baseline. ConvNet obtained satisfactory results when trained using less manually annotated documents, showing robustness and scalability to the gold standard. AUC-PRC: area under the precision-recall curve; AUC-ROC: area under the receiver operating characteristic curve; BoN-LR: logistic regression with bag of n-grams; ConvNet: convolutional neural network-based architecture; FeatEng-LR: logistic regression with feature engineering.</p>
        </caption>
        <graphic xlink:href="medinform_v8i2e16878_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p><xref rid="figure4" ref-type="fig">Figure 4</xref> highlights the distributions of the classification scores (predicted probability of the label <italic>acute LBP</italic>) derived by several supervised models (trained with manual annotations) and TopicModel. ConvNet shows a clear separation between acute LBP notes and the rest of the dataset. In particular, all acute LBP notes had scores greater than 0.2, with 81.6% (727/891) of them having scores greater than 0.5. On the contrary, only 347 controls had scores greater than 0.5, meaning that only a few notes were highly likely to be misclassified. Similarly, TopicModel had no controls with scores greater than 0.7, and all acute LBP notes had scores greater than 0.2.</p>
      <fig id="figure4" position="float">
        <label>Figure 4</label>
        <caption>
          <p>Representation of the probability distribution of the scores obtained by BoN-LR, FeatEng-LR, ConvNet, and TopicModel. ConvNet led to a good separation between acute low back pain clinical notes and all the other documents. In other cases, such separation is not as clear, explaining the worse classification results obtained by those models. BoN-LR: logistic regression with bag of n-grams; ConvNet: convolutional neural network-based architecture; FeatEng-LR: logistic regression with feature engineering.</p>
        </caption>
        <graphic xlink:href="medinform_v8i2e16878_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>Finally, <xref ref-type="table" rid="table2">Table 2</xref> summarizes some of the n-grams driving the <italic>acute LBP</italic> predictions obtained by ConvNet (trained with manual annotations) across the experiments. Although some of these are obvious and refer to the disease itself (eg, “acute lbp”), others refer to medications (eg, “prescribed muscle relaxant” and “flexeril”) and recommendations (eg, “rtw full duty quick”). Given their clinical meaning and relevance, all these patterns can be further analyzed and reviewed to potentially drive the development of guidelines for, for example, treatment and RTW options.</p>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Examples of n-grams that were relevant in identifying acute low back pain notes when using convolutional neural network-based architecture trained with manual annotations. The n-grams’ relevance was determined by analyzing the neurons of the convolutional neural networks activating the max-pooling layers and their log-odds to contribute to the final output. Log-odds were filtered per notes and then averaged over all the notes and evaluation folds.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="500"/>
          <col width="500"/>
          <thead>
            <tr valign="top">
              <td>Type</td>
              <td>Acute LBP<sup>a</sup>-related predictive n-grams</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Diagnosis</td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Muscle spasm lower back</p>
                  </list-item>
                  <list-item>
                    <p>Acute LBP flare</p>
                  </list-item>
                  <list-item>
                    <p>Been having acute back pain</p>
                  </list-item>
                  <list-item>
                    <p>Acute midline LBP</p>
                  </list-item>
                  <list-item>
                    <p>Sports acute bilateral LBP</p>
                  </list-item>
                  <list-item>
                    <p>Acute low back pain</p>
                  </list-item>
                  <list-item>
                    <p>Acute LBP</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>Related conditions</td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Gait abnormality</p>
                  </list-item>
                  <list-item>
                    <p>Showed significant disk herniation</p>
                  </list-item>
                  <list-item>
                    <p>Intermittent sciatica</p>
                  </list-item>
                  <list-item>
                    <p>Spinal stenosis</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>Medications</td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Back pain flare prescribed flexeril</p>
                  </list-item>
                  <list-item>
                    <p>Cyclobenzaprine</p>
                  </list-item>
                  <list-item>
                    <p>Flexeril</p>
                  </list-item>
                  <list-item>
                    <p>Naproxen for acute low back</p>
                  </list-item>
                  <list-item>
                    <p>Prescribed muscle relaxant</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>Recommendations</td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Back brace for back pain</p>
                  </list-item>
                  <list-item>
                    <p>Obtain lumbar spine MRI<sup>b</sup></p>
                  </list-item>
                  <list-item>
                    <p>Recommendation RTW<sup>c</sup> visit</p>
                  </list-item>
                  <list-item>
                    <p>RTW full duty quick</p>
                  </list-item>
                </list>
              </td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table2fn1">
            <p><sup>a</sup>LBP: low back pain.</p>
          </fn>
          <fn id="table2fn2">
            <p><sup>b</sup>MRI: magnetic resonance imaging.</p>
          </fn>
          <fn id="table2fn3">
            <p><sup>c</sup>RTW: return-to-work.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this work, we evaluated the use of several machine learning approaches to identify acute LBP episodes in free-text clinical notes to better personalize the treatment and management of this condition in primary care. The experimental results showed that it is possible to extract acute LBP episodes with promising precision, especially when at least some manually curated annotations are available. In this scenario, ConvNet, a deep learning architecture based on convolutional neural networks, significantly outperformed other shallow techniques based on BoN and LR, opening the possibility to boost performances using more complex architectures from current research in the NLP community. The implemented deep architecture also provides an easy mechanism to explain the predictions, leading to informed decision support based on model transparency [<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref63">63</xref>] and the identification of meaningful patterns that can drive clinical decision making. If no annotations are available, experiments showed that the use of topic modeling is preferred to training a classifier using only the M54.5 ICD-10 codes (ie, <italic>Low back pain</italic>) associated with the clinical note encounter, which proved to be a poor indicator to discriminate LBP episodes. In addition, the topics identified can serve as an intuitive tool to inform guidelines and recommendations, to prefilter the documents, and to reduce the manual work required to annotate the notes. The proposed framework is inherently domain agnostic and does not require any manual supervision to identify relevant features from the free text. Therefore, it can be leveraged in other musculoskeletal condition domains where acuity is not expressed in the ICD-10 diagnostic codes, such as knee, elbow, and shoulder pain.</p>
      </sec>
      <sec>
        <title>Potential Applications</title>
        <p>Medical care decisions are often based on heuristics and manually derived rule-based models constructed on previous knowledge and expertise [<xref ref-type="bibr" rid="ref64">64</xref>]. Cognitive biases and personality traits, such as aversion to risk or ambiguity, overconfidence, and the anchoring effect, may lead to diagnostic inaccuracies and medical errors, resulting in mismanagement or inadequate utilization of resources [<xref ref-type="bibr" rid="ref65">65</xref>]. In the LBP domain, this may lead to delays in finding the right therapy and assisting patients in the return to normal activities, increased risk of transitioning the condition from acute to chronic, discomfort for patients, and increased economic burdens on clinical facilities to adequately treat and manage this patient population. Deriving data-driven guidelines for treatment recommendations can help in reducing these cognitive biases and personality traits, leading to more consistent and accurate decisions. In this scenario, the proposed frameworks integrate seamlessly with the RTW tool proposed by Cruz et al [<xref ref-type="bibr" rid="ref21">21</xref>] by including acuity-relevant information in the clinical notes and addressing 1 of the limitations of that study (ie, recommending the RTW tool at the point of care by accurately identifying the condition as acute LBP). Similarly, an understanding of the patterns driving the predictions can lead to the development of new and improved treatment strategies for various types of injuries, which can be presented to the clinicians at the time of patient encounter to help them with better management of the condition. Although physicians will continue to have autonomy in determining optimal care pathways for their patients, the recommendations provided by the supporting framework will be useful to systematize and support their activities within the realm of the busy clinical practice. Posterior analysis of the clinical notes to infer acute LBP episodes can also help in assigning the proper diagnostic and billing codes for the encounter. In a foreseeable future scenario where, clinical observations are automatically transcribed via voice and EHRs are processed in real time, an automated tool that identifies acuity information could also improve the accuracy of diagnosis and billing in real time, with no need to wait for posterior evaluations.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This work evaluated the feasibility of using machine learning to identify acute LBP episodes in clinical notes. Therefore, we compared different types of models (shallow vs deep) and learning frameworks (unsupervised vs supervised) to identify the best directions for implementation and deployment in real clinical settings. Although several of the architectures evaluated in this work obtained promising results, more sophisticated models are likely to improve these performances, especially in the deep learning domain. For example, algorithms based on attention models [<xref ref-type="bibr" rid="ref66">66</xref>], Bidirectional Encoder Representations from Transformers [<xref ref-type="bibr" rid="ref67">67</xref>], or XLNet [<xref ref-type="bibr" rid="ref68">68</xref>] have shown encouraging results on similar NLP tasks and are likely to obtain better results in this domain as well. In this work, we only focused on processing clinical notes; however, embedding structured EHR data, especially medications, imaging studies, and/or laboratory tests, into the method should improve the results.</p>
        <p>The dataset of clinical notes used in this study originated from a geographically diverse set of primary care clinics serving the New York City population across the city’s metro area over a limited period (ie, 2016 to 2018). Providers were enrolled and randomized into the study on a rolling basis, with the number of encounters for LBP varying for each individual provider, based on his/her own practice. The majority of the PCPs were assistant professors serving on the front lines. No specialists were included in the initial study, as the pilot project was only geared toward the PCPs. Consequently, the results of this study might not be applicable to specialty care practice.</p>
      </sec>
      <sec>
        <title>Future Work</title>
        <p>The classification of LBP episodes as acute or chronic at the point of care level within primary care practice is imperative for an RTW tool to be effectively used to render evidence-based guidelines. At this time, we plan to classify a large set of notes, derive patterns related to acute LBP, and extend the tool proposed by Cruz et al [<xref ref-type="bibr" rid="ref21">21</xref>] according to them. We further plan to identify cases where the RTW tool can be easily deployed based on EHR integration in the clinical domain. We will also begin to address some of the methodological limitations of this study to optimize performance and evaluate its generalizability outside primary care. Finally, we aim to evaluate the feasibility of this type of approach for other musculoskeletal conditions, in particular, shoulder and knee pain.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study demonstrates the feasibility of using machine learning to automatically identify acute LBP episodes from clinical reports using only unstructured free-text data. In particular, manually annotating a set of notes to use as a gold standard can lead to effective results, especially when using deep learning. Topic modeling can help in speeding up the annotation process, initiating an iterative process where initial predictions are validated and then used to refine and optimize the model. This approach provides a generalizable framework for learning to differentiate disease acuity in primary care, which can more accurately and specifically guide the diagnosis and treatment of LBP. It also provides a clear path toward improving the accuracy of coding and billing of clinical encounters for LBP.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">1D</term>
          <def>
            <p>1 dimensional</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUC-PRC</term>
          <def>
            <p>area under the precision-recall curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">AUC-ROC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BoN</term>
          <def>
            <p>bag of n-grams</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ConvNet</term>
          <def>
            <p>convolutional neural network-based architecture</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ICD-10</term>
          <def>
            <p>International Classification of Diseases, 10th revision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">LASSO</term>
          <def>
            <p>least absolute shrinkage and selection operator</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">LBP</term>
          <def>
            <p>low back pain</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">LR</term>
          <def>
            <p>logistic regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">PCP</term>
          <def>
            <p>primary care provider</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">ReLu</term>
          <def>
            <p>rectified linear unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">RTW</term>
          <def>
            <p>return-to-work</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">TF-IDF</term>
          <def>
            <p>term frequency-inverse document frequency</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>IN and LC would like to thank the Pilot Projects Research Training Program of the New York and New Jersey Education and Research Center, National Institute for Occupational Safety and Health, for their funding (grant #T42 OH 008422). RM is grateful for the support from the Hasso Plattner Foundation and a courtesy GPU donation from NVIDIA.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>RM and IN initiated the idea and wrote the manuscript. IN collected the data and provided clinical support. RM conducted the research and the experimental evaluation. BP advised on evaluation strategies and refined the manuscript. BG, HL, and LC refined the manuscript. JD supported the research. All the authors edited and reviewed the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Centers for Disease Control Prevention (CDC)</collab>
          </person-group>
          <article-title>Prevalence and most common causes of disability among adults--United States, 2005</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2009</year>
          <month>05</month>
          <day>1</day>
          <volume>58</volume>
          <issue>16</issue>
          <fpage>421</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/mmwr/preview/mmwrhtml/mm5816a2.htm"/>
          </comment>
          <pub-id pub-id-type="medline">19407734</pub-id>
          <pub-id pub-id-type="pii">mm5816a2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ricci</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Chee</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Leotta</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Foley</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hochberg</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>Back pain exacerbations and lost productive time costs in United States workers</article-title>
          <source>Spine (Phila Pa 1976)</source>
          <year>2006</year>
          <month>12</month>
          <day>15</day>
          <volume>31</volume>
          <issue>26</issue>
          <fpage>3052</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.1097/01.brs.0000249521.61813.aa</pub-id>
          <pub-id pub-id-type="medline">17173003</pub-id>
          <pub-id pub-id-type="pii">00007632-200612150-00012</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luckhaupt</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Dahlhamer</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzales</surname>
              <given-names>GT</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Groenewold</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sweeney</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Ward</surname>
              <given-names>BW</given-names>
            </name>
          </person-group>
          <article-title>Prevalence, recognition of work-relatedness, and effect on work of low back pain among US workers</article-title>
          <source>Ann Intern Med</source>
          <year>2019</year>
          <month>05</month>
          <day>14</day>
          <pub-id pub-id-type="doi">10.7326/M18-3602</pub-id>
          <pub-id pub-id-type="medline">31083729</pub-id>
          <pub-id pub-id-type="pii">2733500</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <source>BMUS: The Burden of Musculoskeletal Diseases in the United States</source>
          <access-date>2019-04-22</access-date>
          <comment>Health Care Utilization and Economic Cost<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.boneandjointburden.org/2014-report/if0/health-care-utilization-and-economic-cost">https://www.boneandjointburden.org/2014-report/if0/health-care-utilization-and-economic-cost</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fairbank</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gwilym</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>France</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Daffner</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Dettori</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hermsmeyer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Andersson</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The role of classification of chronic low back pain</article-title>
          <source>Spine (Phila Pa 1976)</source>
          <year>2011</year>
          <month>10</month>
          <day>1</day>
          <volume>36</volume>
          <issue>21 Suppl</issue>
          <fpage>S19</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1097/BRS.0b013e31822ef72c</pub-id>
          <pub-id pub-id-type="medline">21952188</pub-id>
          <pub-id pub-id-type="pii">00007632-201110011-00003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weiner</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Bonino</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Low back pain in older adults: are we utilizing healthcare resources wisely?</article-title>
          <source>Pain Med</source>
          <year>2006</year>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>143</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1526-4637.2006.00112.x</pub-id>
          <pub-id pub-id-type="medline">16634727</pub-id>
          <pub-id pub-id-type="pii">PME112</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedly</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Deyo</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Increases in lumbosacral injections in the Medicare population: 1994 to 2001</article-title>
          <source>Spine (Phila Pa 1976)</source>
          <year>2007</year>
          <month>07</month>
          <day>15</day>
          <volume>32</volume>
          <issue>16</issue>
          <fpage>1754</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.1097/BRS.0b013e3180b9f96e</pub-id>
          <pub-id pub-id-type="medline">17632396</pub-id>
          <pub-id pub-id-type="pii">00007632-200707150-00010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deyo</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Mirza</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>Trends and variations in the use of spine surgery</article-title>
          <source>Clin Orthop Relat Res</source>
          <year>2006</year>
          <month>02</month>
          <volume>443</volume>
          <fpage>139</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1097/01.blo.0000198726.62514.75</pub-id>
          <pub-id pub-id-type="medline">16462438</pub-id>
          <pub-id pub-id-type="pii">00003086-200602000-00023</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deyo</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Nachemson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mirza</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>Spinal-fusion surgery - the case for restraint</article-title>
          <source>N Engl J Med</source>
          <year>2004</year>
          <month>02</month>
          <day>12</day>
          <volume>350</volume>
          <issue>7</issue>
          <fpage>722</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMsb031771</pub-id>
          <pub-id pub-id-type="medline">14960750</pub-id>
          <pub-id pub-id-type="pii">350/7/722</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ballantyne</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Opioids for the treatment of chronic pain: mistakes made, lessons learned, and future directions</article-title>
          <source>Anesth Analg</source>
          <year>2017</year>
          <month>11</month>
          <volume>125</volume>
          <issue>5</issue>
          <fpage>1769</fpage>
          <lpage>78</lpage>
          <pub-id pub-id-type="doi">10.1213/ANE.0000000000002500</pub-id>
          <pub-id pub-id-type="medline">29049121</pub-id>
          <pub-id pub-id-type="pii">00000539-201711000-00044</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Pietrobon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hey</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Patterns and trends in opioid use among individuals with back pain in the United States</article-title>
          <source>Spine (Phila Pa 1976)</source>
          <year>2004</year>
          <month>04</month>
          <day>15</day>
          <volume>29</volume>
          <issue>8</issue>
          <fpage>884</fpage>
          <lpage>90; discussion 891</lpage>
          <pub-id pub-id-type="doi">10.1097/00007632-200404150-00012</pub-id>
          <pub-id pub-id-type="medline">15082989</pub-id>
          <pub-id pub-id-type="pii">00007632-200404150-00012</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Malmivaara</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Häkkinen</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Aro</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Heinrichs</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Koskenniemi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kuosma</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lappi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Paloheimo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Servo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vaaranen</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>The treatment of acute low back pain--bed rest, exercises, or ordinary activity?</article-title>
          <source>N Engl J Med</source>
          <year>1995</year>
          <month>02</month>
          <day>9</day>
          <volume>332</volume>
          <issue>6</issue>
          <fpage>351</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJM199502093320602</pub-id>
          <pub-id pub-id-type="medline">7823996</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <source>ICD-10 Data</source>
          <access-date>2020-01-07</access-date>
          <comment>2020 ICD-10-CM Diagnosis Code M54.5: Low back pain<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.icd10data.com/ICD10CM/Codes/M00-M99/M50-M54/M54-/M54.5">https://www.icd10data.com/ICD10CM/Codes/M00-M99/M50-M54/M54-/M54.5</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Petersen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Laslett</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Juhl</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Clinical classification in low back pain: best-evidence diagnostic rules based on systematic reviews</article-title>
          <source>BMC Musculoskelet Disord</source>
          <year>2017</year>
          <month>05</month>
          <day>12</day>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>188</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmusculoskeletdisord.biomedcentral.com/articles/10.1186/s12891-017-1549-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12891-017-1549-6</pub-id>
          <pub-id pub-id-type="medline">28499364</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12891-017-1549-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC5429540</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Casazza</surname>
              <given-names>BA</given-names>
            </name>
          </person-group>
          <article-title>Diagnosis and treatment of acute low back pain</article-title>
          <source>Am Fam Physician</source>
          <year>2012</year>
          <month>02</month>
          <day>15</day>
          <volume>85</volume>
          <issue>4</issue>
          <fpage>343</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.aafp.org/link_out?pmid=22335313"/>
          </comment>
          <pub-id pub-id-type="medline">22335313</pub-id>
          <pub-id pub-id-type="pii">d10227</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feuerstein</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>GD</given-names>
            </name>
          </person-group>
          <article-title>National trends in nonoperative care for nonspecific back pain</article-title>
          <source>Spine J</source>
          <year>2004</year>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>56</fpage>
          <lpage>63</lpage>
          <pub-id pub-id-type="doi">10.1016/j.spinee.2003.08.003</pub-id>
          <pub-id pub-id-type="medline">14749194</pub-id>
          <pub-id pub-id-type="pii">S1529943003004637</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kessler</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Foster</surname>
              <given-names>DF</given-names>
            </name>
            <name name-style="western">
              <surname>van Rompay</surname>
              <given-names>MI</given-names>
            </name>
            <name name-style="western">
              <surname>Walters</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>Wilkey</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Kaptchuk</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Eisenberg</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>Long-term trends in the use of complementary and alternative medical therapies in the United States</article-title>
          <source>Ann Intern Med</source>
          <year>2001</year>
          <month>08</month>
          <day>21</day>
          <volume>135</volume>
          <issue>4</issue>
          <fpage>262</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.7326/0003-4819-135-4-200108210-00011</pub-id>
          <pub-id pub-id-type="medline">11511141</pub-id>
          <pub-id pub-id-type="pii">200108210-00011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>BI</given-names>
            </name>
            <name name-style="western">
              <surname>Deyo</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Mirza</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Comstock</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Hollingworth</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sullivan</surname>
              <given-names>SD</given-names>
            </name>
          </person-group>
          <article-title>Expenditures and health status among adults with back and neck problems</article-title>
          <source>J Am Med Assoc</source>
          <year>2008</year>
          <month>02</month>
          <day>13</day>
          <volume>299</volume>
          <issue>6</issue>
          <fpage>656</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.299.6.656</pub-id>
          <pub-id pub-id-type="medline">18270354</pub-id>
          <pub-id pub-id-type="pii">299/6/656</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Freburger</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Holmes</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Agans</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Jackman</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Darter</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Castel</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Kalsbeek</surname>
              <given-names>WD</given-names>
            </name>
            <name name-style="western">
              <surname>Carey</surname>
              <given-names>TS</given-names>
            </name>
          </person-group>
          <article-title>The rising prevalence of chronic low back pain</article-title>
          <source>Arch Intern Med</source>
          <year>2009</year>
          <month>02</month>
          <day>9</day>
          <volume>169</volume>
          <issue>3</issue>
          <fpage>251</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19204216"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/archinternmed.2008.543</pub-id>
          <pub-id pub-id-type="medline">19204216</pub-id>
          <pub-id pub-id-type="pii">169/3/251</pub-id>
          <pub-id pub-id-type="pmcid">PMC4339077</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rossignol</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Abenhaim</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Séguin</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Neveu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Collet</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Ducruet</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Shapiro</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Coordination of primary health care for back pain. A randomized controlled trial</article-title>
          <source>Spine (Phila Pa 1976)</source>
          <year>2000</year>
          <month>01</month>
          <day>15</day>
          <volume>25</volume>
          <issue>2</issue>
          <fpage>251</fpage>
          <lpage>8; discussion 258</lpage>
          <pub-id pub-id-type="doi">10.1097/00007632-200001150-00018</pub-id>
          <pub-id pub-id-type="medline">10685491</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cruz</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Alamgir</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Sheth</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Nabeel</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Development of a return to work tool for primary care providers for patients with low back pain: A pilot study</article-title>
          <source>J Family Med Prim Care</source>
          <year>2018</year>
          <volume>7</volume>
          <issue>6</issue>
          <fpage>1185</fpage>
          <lpage>92</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jfmpc.com/article.asp?issn=2249-4863;year=2018;volume=7;issue=6;spage=1185;epage=1192;aulast=Cruz"/>
          </comment>
          <pub-id pub-id-type="doi">10.4103/jfmpc.jfmpc_262_18</pub-id>
          <pub-id pub-id-type="medline">30613495</pub-id>
          <pub-id pub-id-type="pii">JFMPC-7-1185</pub-id>
          <pub-id pub-id-type="pmcid">PMC6293894</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Owens</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Hegmann</surname>
              <given-names>KT</given-names>
            </name>
            <name name-style="western">
              <surname>Thiese</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>AL</given-names>
            </name>
          </person-group>
          <article-title>Impacts of adherence to evidence-based medicine guidelines for the management of acute low back pain on costs of worker's compensation claims</article-title>
          <source>J Occup Environ Med</source>
          <year>2019</year>
          <month>06</month>
          <volume>61</volume>
          <issue>6</issue>
          <fpage>445</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1097/JOM.0000000000001593</pub-id>
          <pub-id pub-id-type="medline">31167221</pub-id>
          <pub-id pub-id-type="pii">00043764-201906000-00001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <source>Coding Strategies</source>
          <access-date>2020-01-07</access-date>
          <comment>Reporting Pain in ICD-10-CM<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.codingstrategies.com/news/reporting-pain-icd-10-cm">https://www.codingstrategies.com/news/reporting-pain-icd-10-cm</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gross</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Armijo-Olivo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Williams-Whitt</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>NT</given-names>
            </name>
            <name name-style="western">
              <surname>Hartvigsen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ha</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Woodhouse</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Steenstra</surname>
              <given-names>IA</given-names>
            </name>
          </person-group>
          <article-title>Clinical decision support tools for selecting interventions for patients with disabling musculoskeletal disorders: a scoping review</article-title>
          <source>J Occup Rehabil</source>
          <year>2016</year>
          <month>09</month>
          <volume>26</volume>
          <issue>3</issue>
          <fpage>286</fpage>
          <lpage>318</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26667939"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10926-015-9614-1</pub-id>
          <pub-id pub-id-type="medline">26667939</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10926-015-9614-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC4967425</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brunak</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Mining electronic health records: towards better research applications and clinical care</article-title>
          <source>Nat Rev Genet</source>
          <year>2012</year>
          <month>05</month>
          <day>2</day>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>395</fpage>
          <lpage>405</lpage>
          <pub-id pub-id-type="doi">10.1038/nrg3208</pub-id>
          <pub-id pub-id-type="medline">22549152</pub-id>
          <pub-id pub-id-type="pii">nrg3208</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Glicksberg</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>The next generation of precision medicine: observational studies, electronic health records, biobanks and continuous monitoring</article-title>
          <source>Hum Mol Genet</source>
          <year>2018</year>
          <month>05</month>
          <day>1</day>
          <volume>27</volume>
          <issue>R1</issue>
          <fpage>R56</fpage>
          <lpage>62</lpage>
          <pub-id pub-id-type="doi">10.1093/hmg/ddy114</pub-id>
          <pub-id pub-id-type="medline">29659828</pub-id>
          <pub-id pub-id-type="pii">4969371</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>LeCun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Deep learning</article-title>
          <source>Nature</source>
          <year>2015</year>
          <month>05</month>
          <day>28</day>
          <volume>521</volume>
          <issue>7553</issue>
          <fpage>436</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
          <pub-id pub-id-type="medline">26017442</pub-id>
          <pub-id pub-id-type="pii">nature14539</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kidd</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>Deep Patient: an unsupervised representation to predict the future of patients from the electronic health records</article-title>
          <source>Sci Rep</source>
          <year>2016</year>
          <month>05</month>
          <day>17</day>
          <volume>6</volume>
          <fpage>26094</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.1038/srep26094"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/srep26094</pub-id>
          <pub-id pub-id-type="medline">27185194</pub-id>
          <pub-id pub-id-type="pii">srep26094</pub-id>
          <pub-id pub-id-type="pmcid">PMC4869115</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>Deep learning for healthcare: review, opportunities and challenges</article-title>
          <source>Brief Bioinform</source>
          <year>2018</year>
          <month>11</month>
          <day>27</day>
          <volume>19</volume>
          <issue>6</issue>
          <fpage>1236</fpage>
          <lpage>46</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28481991"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bib/bbx044</pub-id>
          <pub-id pub-id-type="medline">28481991</pub-id>
          <pub-id pub-id-type="pii">3800524</pub-id>
          <pub-id pub-id-type="pmcid">PMC6455466</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bahadori</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Schuetz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Doctor AI: predicting clinical events via recurrent neural networks</article-title>
          <source>JMLR Workshop Conf Proc</source>
          <year>2016</year>
          <month>08</month>
          <volume>56</volume>
          <fpage>301</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28286600"/>
          </comment>
          <pub-id pub-id-type="medline">28286600</pub-id>
          <pub-id pub-id-type="pmcid">PMC5341604</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Opportunities and challenges in developing deep learning models using electronic health records data: a systematic review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2018</year>
          <month>10</month>
          <day>1</day>
          <volume>25</volume>
          <issue>10</issue>
          <fpage>1419</fpage>
          <lpage>28</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29893864"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocy068</pub-id>
          <pub-id pub-id-type="medline">29893864</pub-id>
          <pub-id pub-id-type="pii">5035024</pub-id>
          <pub-id pub-id-type="pmcid">PMC6188527</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rajkomar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oren</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Hajaj</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hardt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sundberg</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Flores</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Duggan</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Irvine</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Litsch</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mossin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tansuwan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wexler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ludwig</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Volchenboum</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Madabushi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Howell</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Scalable and accurate deep learning with electronic health records</article-title>
          <source>NPJ Digit Med</source>
          <year>2018</year>
          <volume>1</volume>
          <fpage>18</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31304302"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-018-0029-1</pub-id>
          <pub-id pub-id-type="medline">31304302</pub-id>
          <pub-id pub-id-type="pii">29</pub-id>
          <pub-id pub-id-type="pmcid">PMC6550175</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>Joel T</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Ferro</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Crestani</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Moens</surname>
              <given-names>MF</given-names>
            </name>
          </person-group>
          <article-title>Deep learning to predict patient future diseases from the electronic health records</article-title>
          <source>Advances in Information Retrieval</source>
          <year>2016</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>768</fpage>
          <lpage>74</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldberg</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A primer on neural network models for Natural Language Processing</article-title>
          <source>J Artif Intell Res</source>
          <year>2016</year>
          <volume>57</volume>
          <issue>7</issue>
          <fpage>345</fpage>
          <lpage>420</lpage>
          <pub-id pub-id-type="doi">10.1613/jair.4992</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Schuster</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>QV</given-names>
            </name>
            <name name-style="western">
              <surname>Norouzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Macherey</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Krikun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Macherey</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Klingner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gouws</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kato</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kudo</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kazawa</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kurian</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Patil</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Riesa</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rudnick</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vinyals</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>arXiv e-Print archive</source>
          <year>2016</year>
          <access-date>2020-01-07</access-date>
          <comment>Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1609.08144">http://arxiv.org/abs/1609.08144</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kannan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kurach</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ravi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kaufmann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tomkins</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Miklos</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lukács</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ganea</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ramavajjala</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Smart Reply: Automated Response Suggestion for Email</article-title>
          <source>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2016</year>
          <conf-name>KDD'16</conf-name>
          <conf-date>August 13-17, 2016</conf-date>
          <conf-loc>San Francisco, United States</conf-loc>
          <publisher-loc>San Fransisco, CA</publisher-loc>
          <publisher-name>ACM</publisher-name>
          <fpage>955</fpage>
          <lpage>64</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vinyals</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Toshev</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Erhan</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Show and Tell: A Neural Image Caption Generator</article-title>
          <source>Proceedings of the 2015 IEEE Conference on Computer Vision and Pattern Recognition</source>
          <year>2015</year>
          <conf-name>CVPR'15</conf-name>
          <conf-date>June 7-12, 2015</conf-date>
          <conf-loc>Boston, MA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Vinyals_Show_and_Tell_2015_CVPR_paper.pdf">https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Vinyals_Show_and_Tell_2015_CVPR_paper.pdf
</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sheikhalishahi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Lavelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rinaldi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Osmani</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Natural Language Processing of clinical notes on chronic diseases: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>04</month>
          <day>27</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>e12239</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/2/e12239/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12239</pub-id>
          <pub-id pub-id-type="medline">31066697</pub-id>
          <pub-id pub-id-type="pii">v7i2e12239</pub-id>
          <pub-id pub-id-type="pmcid">PMC6528438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baumel</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nassour-Kassis</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <source>arXiv e-Print archive</source>
          <year>2017</year>
          <access-date>2020-01-07</access-date>
          <comment>Multi-Label Classification of Patient Notes a Case Study on ICD Code Assignment<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1709.09587">http://arxiv.org/abs/1709.09587</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mullenbach</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wiegreffe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Duke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Eisenstein</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>arXiv e-Print archive</source>
          <year>2018</year>
          <access-date>2020-01-07</access-date>
          <comment>Explainable Prediction of Medical Codes from Clinical Text<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1802.05695">http://arxiv.org/abs/1802.05695</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xing</surname>
              <given-names>EP</given-names>
            </name>
          </person-group>
          <source>arXiv e-Print archive</source>
          <year>2017</year>
          <access-date>2020-01-07</access-date>
          <comment>Towards Automated ICD Coding Using Deep Learning<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1711.04075">http://arxiv.org/abs/1711.04075</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Razavian</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <source>arXiv e-Print archive</source>
          <year>2018</year>
          <access-date>2020-01-07</access-date>
          <comment>Deep EHR: Chronic Disease Prediction Using Medical Notes<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1808.04928">http://arxiv.org/abs/1808.04928</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ramanathan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tourassi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Angelov</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Manolopoulos</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Iliadis</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vellasco</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Multi-task deep neural networks for automated extraction of primary site and laterality information from cancer pathology reports</article-title>
          <source>Advances in Big Data</source>
          <year>2016</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>195</fpage>
          <lpage>204</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>JX</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Fearn</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Tourassi</surname>
              <given-names>GD</given-names>
            </name>
          </person-group>
          <article-title>Deep learning for automated extraction of primary sites from cancer pathology reports</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2018</year>
          <month>01</month>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>244</fpage>
          <lpage>51</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2017.2700722</pub-id>
          <pub-id pub-id-type="medline">28475069</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobs</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Marques</surname>
              <given-names>CK</given-names>
            </name>
            <name name-style="western">
              <surname>Oates</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Kamen</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Obeid</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Word2Vec inversion and traditional text classifiers for phenotyping lupus</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2017</year>
          <month>08</month>
          <day>22</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>126</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-017-0518-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-017-0518-1</pub-id>
          <pub-id pub-id-type="medline">28830409</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-017-0518-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5568290</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gehrmann</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dernoncourt</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Carlson</surname>
              <given-names>ET</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Welt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Foote Jr</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Moseley</surname>
              <given-names>ET</given-names>
            </name>
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Tyler</surname>
              <given-names>PD</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <source>arXiv e-Print archive</source>
          <year>2017</year>
          <access-date>2020-01-07</access-date>
          <comment>Comparing Rule-Based and Deep Learning Models for Patient Phenotyping<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1703.08705">http://arxiv.org/abs/1703.08705</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor-Vaisey</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Translating guidelines into practice. A systematic review of theoretic concepts, practical experience and research evidence in the adoption of clinical practice guidelines</article-title>
          <source>Can Med Assoc J</source>
          <year>1997</year>
          <month>08</month>
          <day>15</day>
          <volume>157</volume>
          <issue>4</issue>
          <fpage>408</fpage>
          <lpage>16</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cmaj.ca/cgi/pmidlookup?view=reprint&#38;pmid=9275952"/>
          </comment>
          <pub-id pub-id-type="medline">9275952</pub-id>
          <pub-id pub-id-type="pmcid">PMC1227916</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Navarro</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A guided tour to approximate string matching</article-title>
          <source>ACM Comput Surv</source>
          <year>2001</year>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>31</fpage>
          <lpage>88</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://paperpile.com/b/oHrtII/BRNpt"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/375360.375365</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Bridewell</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hanbury</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>GF</given-names>
            </name>
            <name name-style="western">
              <surname>Buchanan</surname>
              <given-names>BG</given-names>
            </name>
          </person-group>
          <article-title>A simple algorithm for identifying negated findings and diseases in discharge summaries</article-title>
          <source>J Biomed Inform</source>
          <year>2001</year>
          <month>10</month>
          <volume>34</volume>
          <issue>5</issue>
          <fpage>301</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(01)91029-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1006/jbin.2001.1029</pub-id>
          <pub-id pub-id-type="medline">12123149</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(01)91029-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blei</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>Probabilistic topic models</article-title>
          <source>Commun ACM</source>
          <year>2012</year>
          <volume>55</volume>
          <issue>4</issue>
          <fpage>77</fpage>
          <pub-id pub-id-type="doi">10.1145/2133806.2133826</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blei</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>AY</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>MI</given-names>
            </name>
          </person-group>
          <article-title>Latent Dirichlet Allocation</article-title>
          <source>J Mach Learn Res</source>
          <year>2003</year>
          <volume>3</volume>
          <fpage>993</fpage>
          <lpage>1022</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://paperpile.com/b/oHrtII/v6N1E"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Case-based reasoning using electronic health records efficiently identifies eligible patients for clinical trials</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2015</year>
          <month>04</month>
          <volume>22</volume>
          <issue>e1</issue>
          <fpage>e141</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25769682"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocu050</pub-id>
          <pub-id pub-id-type="medline">25769682</pub-id>
          <pub-id pub-id-type="pii">ocu050</pub-id>
          <pub-id pub-id-type="pmcid">PMC4428438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Perotte</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bartlett</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Hierarchically Supervised Latent Dirichlet Allocation</article-title>
          <source>Proceedings of the Advances in Neural Information Processing Systems 24</source>
          <year>2011</year>
          <conf-name>NIPS'11</conf-name>
          <conf-date>December 12-17, 2011</conf-date>
          <conf-loc>Granada, Spain</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Curran Associates, Inc</publisher-name>
          <fpage>2609</fpage>
          <lpage>17</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="hhttps://papers.nips.cc/paper/4313-hierarchically-supervised-latent-dirichlet-allocation.pdf">https://papers.nips.cc/paper/4313-hierarchically-supervised-latent-dirichlet-allocation.pdf
</ext-link>
          </comment>
          <pub-id pub-id-type="doi">10.1108/09504120310455975</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Lou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Karaletsos</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Crosbie</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gardos</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Artz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rätsch</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <source>bioRxiv - the preprint server for Biology</source>
          <year>2016</year>
          <access-date>2020-01-07</access-date>
          <comment>An Empirical Analysis of Topic Modeling for Mining Cancer Clinical Notes<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.biorxiv.org/content/10.1101/062307v1">https://www.biorxiv.org/content/10.1101/062307v1</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Aviram</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Redundancy-aware topic modeling for patient record notes</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>e87555</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0087555"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0087555</pub-id>
          <pub-id pub-id-type="medline">24551060</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-38704</pub-id>
          <pub-id pub-id-type="pmcid">PMC3923754</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Distributed Representations of Words and Phrases and their Compositionality</article-title>
          <source>Proceedings of the Advances in Neural Information Processing Systems 26</source>
          <year>2013</year>
          <conf-name>NIPS'13</conf-name>
          <conf-date>December 5-10, 2013</conf-date>
          <conf-loc>Lake Tahoe, Nevada, USA</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Curran Associates, Inc</publisher-name>
          <fpage>3111</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf">https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Glicksberg</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Shameer</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>Automated disease cohort selection using word embeddings from Electronic Health Records</article-title>
          <source>Pac Symp Biocomput</source>
          <year>2018</year>
          <volume>23</volume>
          <fpage>145</fpage>
          <lpage>56</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://psb.stanford.edu/psb-online/proceedings/psb18/abstracts/2018_p145.html"/>
          </comment>
          <pub-id pub-id-type="medline">29218877</pub-id>
          <pub-id pub-id-type="pii">9789813235533_0014</pub-id>
          <pub-id pub-id-type="pmcid">PMC5788312</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chiu</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Sontag</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Learning low-dimensional representations of medical concepts</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2016</year>
          <volume>2016</volume>
          <fpage>41</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27570647"/>
          </comment>
          <pub-id pub-id-type="medline">27570647</pub-id>
          <pub-id pub-id-type="pmcid">PMC5001761</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Kingsbury</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A comparison of word embeddings for the biomedical natural language processing</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>11</month>
          <volume>87</volume>
          <fpage>12</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30182-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.09.008</pub-id>
          <pub-id pub-id-type="medline">30217670</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30182-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC6585427</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <source>arXiv e-Print archive</source>
          <year>2014</year>
          <access-date>2020-01-07</access-date>
          <comment>Convolutional Neural Networks for Sentence Classification<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1408.5882">http://arxiv.org/abs/1408.5882</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baeza-Yates</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ribeiro-Neto</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <source>Modern Information Retrieval: The Concepts and Technology behind Search. Second Edition</source>
          <year>2011</year>
          <publisher-loc>Boston, MA</publisher-loc>
          <publisher-name>Addison-Wesley Professional</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holzinger</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Biemann</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pattichis</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Kell</surname>
              <given-names>DB</given-names>
            </name>
          </person-group>
          <source>arXiv e-Print archive</source>
          <year>2017</year>
          <access-date>2020-01-07</access-date>
          <comment>What Do We Need to Build Explainable AI Systems for the Medical Domain?<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1712.09923">http://arxiv.org/abs/1712.09923</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lipton</surname>
              <given-names>ZC</given-names>
            </name>
          </person-group>
          <source>arXiv e-Print archive</source>
          <year>2016</year>
          <access-date>2020-01-07</access-date>
          <comment>The Mythos of Model Interpretability<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1606.03490">http://arxiv.org/abs/1606.03490</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marewski</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Gigerenzer</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Heuristic decision making in medicine</article-title>
          <source>Dialogues Clin Neurosci</source>
          <year>2012</year>
          <month>03</month>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>77</fpage>
          <lpage>89</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22577307"/>
          </comment>
          <pub-id pub-id-type="medline">22577307</pub-id>
          <pub-id pub-id-type="pmcid">PMC3341653</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saposnik</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Redelmeier</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ruff</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Tobler</surname>
              <given-names>PN</given-names>
            </name>
          </person-group>
          <article-title>Cognitive biases associated with medical decisions: a systematic review</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2016</year>
          <month>11</month>
          <day>3</day>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>138</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-016-0377-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-016-0377-1</pub-id>
          <pub-id pub-id-type="medline">27809908</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-016-0377-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5093937</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is All you Need</article-title>
          <source>Proceedings of the Advances in Neural Information Processing Systems 30</source>
          <year>2017</year>
          <conf-name>NIPS'17</conf-name>
          <conf-date>December 4-9, 2017</conf-date>
          <conf-loc>Long Beach, CA, USA</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Curran Associates, Inc</publisher-name>
          <fpage>5998</fpage>
          <lpage>6008</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf">https://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <source>arXiv e-Print archive</source>
          <year>2018</year>
          <access-date>2020-01-07</access-date>
          <comment>BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1810.04805">http://arxiv.org/abs/1810.04805</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Carbonell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Salakhutdinov</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>QV</given-names>
            </name>
          </person-group>
          <source>arXiv e-Print archive</source>
          <year>2019</year>
          <access-date>2020-01-07</access-date>
          <comment>XLNet: Generalized Autoregressive Pretraining for Language Understanding<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1906.08237">http://arxiv.org/abs/1906.08237</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
