<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i7e16008</article-id>
      <article-id pub-id-type="pmid">32706678</article-id>
      <article-id pub-id-type="doi">10.2196/16008</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Prediction of Medical Concepts in Electronic Health Records: Similar Patient Analysis</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Gupta</surname>
            <given-names>Amarnath</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jain</surname>
            <given-names>Felipe</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Le</surname>
            <given-names>Nhat</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science &#38; Engineering</institution>
            <institution>University of California, Riverside</institution>
            <addr-line>Winston Chung Hall 363</addr-line>
            <addr-line>900 University Ave.</addr-line>
            <addr-line>Riverside, CA, 92521</addr-line>
            <country>United States</country>
            <phone>1 9518275639</phone>
            <email>nle020@ucr.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9406-8716</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Wiley</surname>
            <given-names>Matthew</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9441-3786</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Loza</surname>
            <given-names>Antonio</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7136-1351</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Hristidis</surname>
            <given-names>Vagelis</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8679-4988</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>El-Kareh</surname>
            <given-names>Robert</given-names>
          </name>
          <degrees>MD, MS, MPH</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3158-5681</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science &#38; Engineering</institution>
        <institution>University of California, Riverside</institution>
        <addr-line>Riverside, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>School of Medicine</institution>
        <institution>University of California, Riverside</institution>
        <addr-line>Riverside, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Medicine</institution>
        <institution>University of California, San Diego</institution>
        <addr-line>San Diego, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Nhat Le <email>nle020@ucr.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>17</day>
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>7</issue>
      <elocation-id>e16008</elocation-id>
      <history>
        <date date-type="received">
          <day>28</day>
          <month>8</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>21</day>
          <month>10</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>1</day>
          <month>3</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>28</day>
          <month>3</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Nhat Le, Matthew Wiley, Antonio Loza, Vagelis Hristidis, Robert El-Kareh. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 17.07.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2020/7/e16008" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Medicine 2.0—the adoption of Web 2.0 technologies such as social networks in health care—creates the need for apps that can find other patients with similar experiences and health conditions based on a patient’s electronic health record (EHR). Concurrently, there is an increasing number of longitudinal EHR data sets with rich information, which are essential to fulfill this need.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to evaluate the hypothesis that we can leverage similar EHRs to predict possible future medical concepts (eg, disorders) from a patient’s EHR.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We represented patients’ EHRs using time-based prefixes and suffixes, where each prefix or suffix is a set of medical concepts from a medical ontology. We compared the prefixes of other patients in the collection with the state of the current patient using various interpatient distance measures. The set of similar prefixes yields a set of suffixes, which we used to determine probable future concepts for the current patient’s EHR.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We evaluated our methods on the Multiparameter Intelligent Monitoring in Intensive Care II data set of patients, where we achieved precision up to 56.1% and recall up to 69.5%. For a limited set of clinically interesting concepts, specifically a set of procedures, we found that 86.9% (353/406) of the true-positives are clinically useful, that is, these procedures were actually performed later on the patient, and only 4.7% (19/406) of true-positives were completely irrelevant.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>These initial results indicate that predicting patients’ future medical concepts is feasible. Effectively predicting medical concepts can have several applications, such as managing resources in a hospital.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>consumer health information</kwd>
        <kwd>decision support techniques</kwd>
        <kwd>electronic health record</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Medicine 2.0—the intersection of Web 2.0 and health care services, apps, and tools—brings new opportunities for patients to actively contribute to their own care [<xref ref-type="bibr" rid="ref1">1</xref>]. With the rapid adoption of patients’ electronic health records (EHRs) [<xref ref-type="bibr" rid="ref2">2</xref>], allowing users to find patients with similar experiences and health conditions based on their EHR has the potential to improve the quality of care and expand options for health care solutions [<xref ref-type="bibr" rid="ref3">3</xref>]. This approach may lead to novel apps for patients, such as self-management recommendations based on big data aggregation across cohorts [<xref ref-type="bibr" rid="ref4">4</xref>]. Apps that allow patients to find, discuss, and share health data and information can improve patient outcomes while raising meaningful discussions in disease management [<xref ref-type="bibr" rid="ref5">5</xref>]. Therefore, finding patients with similar experiences and health conditions is a critical step for patients to contribute to their own care. This capability is becoming more important as more patient records become available (with user consent and commonly anonymized), for instance, through health social networks that aim to connect patients, which drive the need for patient-centered health informatics [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p>
        <p>We evaluated the <italic>hypothesis</italic> that we can predict possible future medical concepts in a patient’s EHR by leveraging the EHRs of other patients in the collection. Medical concepts are entities of a medical ontology, which is a knowledge network of medical concepts, where concepts and their definitions are categorized and interconnected (normally via a hierarchy) to present their semantic meanings. Given a point of time, a patient’s current medical history is stored in form of EHRs. Future medical concepts are defined as the ones appearing in the patient’s EHRs after that point, which is also the patient’s future medical record. To evaluate our hypothesis, we first organized each patient’s EHR in the database as a list of chronological medical events, which can be divided into a prefix (a sequence of events up to a time moment) and a suffix (a sequence of events that happened after this time moment). Then, we used various interpatient similarity measures to locate other patients’ EHRs that have prefixes similar to the current patient’s EHR. Finally, we processed the time-based suffixes of the matched EHRs to determine which medical concepts are probable for the future of the current patient’s EHR. In short, our method uses EHRs of patients with similar past medical developments to predict a patient’s upcoming developments.</p>
        <p>Furthermore, our method offers the prediction’s explanation by providing similar patients and medical concepts influencing the prediction; thus, it does not suffer the interpretability limitation of common deep learning techniques [<xref ref-type="bibr" rid="ref8">8</xref>]. Although we used the Multiparameter Intelligent Monitoring in Intensive Care (MIMIC) II database to evaluate our methods, our methods are applicable to any database of EHRs, where a set of medical concepts can be extracted for various time instances (eg, hospital visits) during a patient’s care.</p>
        <p>Patients are not the only stakeholders who stand to benefit from the prediction of future medical concepts in an EHR; clinicians and clinical researchers can also benefit from a what-if analysis based on similar patients. For example, when a physician is answering questions for a patient or the patient's family, such an analysis may be helpful as supporting evidence, especially to provide data-driven guidance in the absence of specific gold standard [<xref ref-type="bibr" rid="ref7">7</xref>]. Moreover, the clinician may view the changes in the probable future EHR of a patient if a specific therapy is undertaken. From a research standpoint, clinical researchers may be interested in finding patients with similar predicted concepts when performing nonrandomized studies, for example, for matching cases and controls.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <p>Research related to our study is divided into 2 groups: those that consider (1) interpatient similarity measures and (2) analysis and prediction via aggregated patient data. The former is related to patients with similar experiences, and health conditions were used for predicting future medical concepts. The latter group is related in that an aggregate of patient data across a database of EHRs was used for predicting future medical concepts. However, none of the related studies have defined the notion of EHR prefixes and EHR suffixes when aggregating patient data or finding patients with similar experiences and health conditions.</p>
      </sec>
      <sec>
        <title>Interpatient Similarity Measures</title>
        <p>When measuring patients with similar experiences and health conditions, we leveraged previous papers, which have studied several interpatient distance functions. Methods include case-based reasoning, vector space models, bag-of-concepts (BoCs), information content, path length between concepts, common ancestors of concepts, and combinations of these. None of these methods have been applied to EHR prefixes and EHR suffixes for predicting future medical concepts. Thus, the intuitive question is, “Are these interpatient similarity measures powerful enough to identify patients with similar histories and futures?”</p>
        <p>Cao et al [<xref ref-type="bibr" rid="ref9">9</xref>] used case-based reasoning to find patients with similar experiences and health conditions based on clinical text. They found that medical concepts are superior features compared with a bag-of-words approach. Similar to this study, the authors restricted medical concepts to a specific subset of semantic types, but the authors did not consider semantic similarity between concepts—for example, 2 concepts may be neighbors in the Systemized Nomenclature of MEDical Clinical Terms (SNOMED-CT) ontology—when comparing patients. Mabotuwana et al [<xref ref-type="bibr" rid="ref10">10</xref>] studied an ontology-based similarity measure for radiology reports where the authors extended cosine similarity to include the semantic similarity of medical concepts mentioned in radiology reports. The authors found that the addition of semantic similarity allows a vector space model to differentiate between radiology reports of different anatomical and image procedure–based classes. Plaza and Diaz [<xref ref-type="bibr" rid="ref11">11</xref>] studied concept graphs for measuring interpatient similarity. Given a set of concepts for a patient, all ancestors of each concept are retrieved and assigned a weight based on their depth, where deeper concepts have higher weights. This method is studied in this study and explained in greater detail in the Methods section. Melton et al [<xref ref-type="bibr" rid="ref12">12</xref>] studied a variety of interpatient distance measures, including BoCs and average path length (APL). Both the BoCs and unweighted APLs are investigated and described in greater detail in the Methods section.</p>
      </sec>
      <sec>
        <title>Analysis and Prediction of Aggregated Patient Data</title>
        <p>Related work on aggregating patient data for analytics employs a patient database to provide recommendations, analysis, and/or predictions. Gotz et al at IBM Corporation [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>] developed an interactive system to aid domain experts in retrospective patient cohort analysis. Similar to our study, their system finds a cohort of patients with similar health conditions based on the EHR of the physician’s current patient via symptoms. Statistics for the cohort are aggregated and visualized using a variety of techniques, including an outflow graph that models the evolution of symptoms over time and the respective outcomes. Unlike this study, their system does not predict future medical concepts, nor do they use ontologies when measuring patients with similar health conditions. However, their study complements our study in that the user can use predicted symptoms to explore possible outcomes in the outflow graph.</p>
        <p>PatientsLikeMe has also examined the effects of aggregating patient data [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. A web-based survey found that users reported several benefits from having access to aggregated patient statistics. Furthermore, they found a correlation between perceived benefit and the number of website features used by a user, along with demographic similarities between the users of the web-based platform and actual patient populations. This study aimed to complement the data created by PatientsLikeMe by employing aggregated data to predict future medical concepts.</p>
        <p>Recent advancements in deep learning offer a new, powerful predictive tool for patients’ EHRs [<xref ref-type="bibr" rid="ref17">17</xref>]. Miotto et al [<xref ref-type="bibr" rid="ref18">18</xref>] proposed a 3-layered stack of denoising autoencoders to learn a vector representation of each patient from an EHR database of approximately 700,000 patients and then used this <italic>deep patient</italic> embedding to predict the probability of patients developing 78 diseases. Studies by Razavian et al, Lipton et al, Choi et al, and Nguyen et al [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref22">22</xref>] explored the temporal order of medical events and different neural network architectures, such as recurrent convolutional networks. Rajkomar et al [<xref ref-type="bibr" rid="ref23">23</xref>] represented a patient’s entire EHR as a temporal sequence of medical events in the fast health care interoperability resources format and applied various deep learning models to learn the patient’s representation for further predictions: inpatient mortality, 30-day unplanned readmission, long length of stay, and 14,025 International Classification of Diseases-9th revision, diagnosis codes. In general, these methods learn the patient’s vector representation, which is used to model downstream prediction tasks such as classification or regression problems. Although these studies restrict their predictions to a predefined medical concept set, our study makes predictions of any medical concepts appearing in patients with similar health conditions. Moreover, whereas deep learning approaches offer limited interpretability [<xref ref-type="bibr" rid="ref8">8</xref>], our method explains how a prediction is made.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>We represented each patient as a set of medical concepts from SNOMED-CT [<xref ref-type="bibr" rid="ref24">24</xref>]. We extracted medical concepts using the MetaMap library [<xref ref-type="bibr" rid="ref25">25</xref>]. Then, to identify patients with similar health conditions, we adopted various distance functions studied in the literature [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. We showed how to extend these distance functions to predict future medical concepts, given a query patient. We demonstrated and evaluated these methods on the MIMIC II clinical database, which contains patient data from visits to an intensive care unit (ICU) [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
      <sec>
        <title>Framework and Method for Predicting Future Concepts Using Similar Patients</title>
        <p>First, we proposed our framework for discretizing EHRs into events, yielding the notion of <italic>EHR prefixes</italic> and <italic>EHR suffixes</italic>. Consider a database of patient visits to an ICU. One possible method to discretize these visits is to exploit transfers between wards within the ICU, as illustrated by the example in <xref rid="figure1" ref-type="fig">Figure 1</xref>. In this example, the patient is admitted to the medical ICU, transferred to the surgical ICU, and then transferred back to the medical ICU. The patient’s time in each ward represents a distinct <italic>event</italic>, where clinical notes are recorded that report the patient’s status; thus, medical concepts reported in each ward are associated with a specific event. Furthermore, these events have a natural ordering, which produces the notion of EHR prefixes and EHR suffixes. In this example, there are 2 possible EHR prefixes, <italic>[Event1]</italic> and <italic>[Event1, Event2]</italic>, and 2 possible EHR suffixes, <italic>[Event2, Event3]</italic> and <italic>[Event3]</italic>. Hence, each EHR prefix and EHR suffix is associated with a set of medical concepts, as shown at the bottom of <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <p>The motivation for discretizing EHRs into events is that health care changes over time with respect to medical conditions, procedures, findings, and drugs observed from the past. Given a new patient, our goal is to find similar EHR prefixes from the EHR database such that the respective EHR suffixes will predict the new patient’s future. Let the new patient’s EHR be denoted by <italic>Q</italic>, where <italic>Q</italic> is represented as a set of medical concepts defined on an ontology. Let <italic>Q</italic><sup>p</sup><sub>k</sub> represent the set of medical concepts obtained from the first <italic>k</italic> events, where the superscript <italic>p</italic> denotes that this set is an EHR prefix. The corresponding EHR suffix is denoted by <italic>Q<sup>S</sup><sub>k</sub></italic><sub>+1</sub>, which represents the set of medical concepts from event <italic>k+1</italic> to the last event in the EHR. Note that in a clinical setting, we would use the whole EHR as <italic>Q</italic><sup>p</sup><sub>k</sub> as the goal is to predict future concepts, given the current state of the patient. Finally, let <italic>D</italic> be the database of records within the EHR. We now define our concept prediction algorithm that consists of 2 steps: (1) finding similar records and (2) returning concepts with high confidence.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>An example of a patient visiting the intensive care unit, discretized by ward transfers. In this example, the patient was admitted to the medical intensive care unit, transported to radiology, and transferred to the surgical intensive care unit. As this example contains 3 events, there are 2 possible electronic health record prefixes and 2 possible electronic health record suffixes. ICU: intensive care unit; NICU: neonatal intensive care unit.</p>
          </caption>
          <graphic xlink:href="medinform_v8i7e16008_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>Concept Prediction Algorithm</title>
          <sec>
            <title>Step 1: Compute Similar Electronic Health Record Prefixes</title>
            <p>In particular, find the set <italic>S</italic> of EHR suffixes that correspond to the EHR prefixes <italic>P<sub>i</sub></italic> in <italic>D</italic> whose dissimilarity with respect to <italic>Q<sup>p</sup><sub>k</sub></italic> is less than some <italic>dissimilarity threshold τ</italic>:<inline-graphic xlink:href="medinform_v8i7e16008_fig5.png" xlink:type="simple" mimetype="image"/> where <italic>P<sub>i</sub></italic> is an EHR prefix of events from a single visit, <italic>S<sub>i</sub></italic> is the corresponding EHR suffix, and <italic>DisSim</italic> is an interpatient dissimilarity function. Note that we only considered the most similar EHR prefix for each visit.</p>
          </sec>
          <sec>
            <title>Step 2: Return Concepts With High Confidence</title>
            <p>Let <inline-graphic xlink:href="medinform_v8i7e16008_fig6.png" xlink:type="simple" mimetype="image"/> be the confidence of concept c, where <italic>S'<sub>c</sub></italic> is the EHR suffixes from <italic>S</italic> that contain <italic>c</italic>. We return <inline-graphic xlink:href="medinform_v8i7e16008_fig7.png" xlink:type="simple" mimetype="image"/>, which is the set of concepts in <italic>S</italic> with confidence greater than the <italic>confidence threshold 𝜆</italic>.</p>
            <p><xref rid="figure2" ref-type="fig">Figure 2</xref> illustrates step 1 of the concept prediction algorithm, where only prefixes <italic>P<sub>2</sub></italic> and <italic>P<sub>5</sub></italic> have dissimilarities from the query prefix <italic>p</italic> (or with respect to <italic>Q</italic><sup>p</sup><sub>k</sub>) smaller than the threshold τ; thus, their corresponding suffixes <italic>S<sub>2</sub></italic> and <italic>S</italic><sub>5</sub> are included in <italic>S</italic>. Define <inline-graphic xlink:href="medinform_v8i7e16008_fig8.png" xlink:type="simple" mimetype="image"/>. Furthermore, let <italic>P</italic><sub>5</sub> and <italic>S</italic><sub>5</sub> be EHR prefix B and EHR suffix B from <xref rid="figure1" ref-type="fig">Figure 1</xref>. Thus, <graphic xlink:href="medinform_v8i7e16008_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>. Let λ=0.7, then step 2 of the algorithm returns <italic>C</italic>={<italic>Intubated, Seizure</italic>}.</p>
            <fig id="figure2" position="float">
              <label>Figure 2</label>
              <caption>
                <p>Dissimilarities of electronic health record prefixes with respect to the k-events prefix of a patient Q denoted by Q_k<sup>p</sup>.</p>
              </caption>
              <graphic xlink:href="medinform_v8i7e16008_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
            <p>Hence, we can evaluate both parameters and <italic>DisSim</italic> using traditional measures of specificity, sensitivity, and precision. Let, <italic>U</italic> be the universe of all medical concepts. True-positives (TPs), true-negatives (TNs), false-positives (FPs), and false-negatives (FNs) are defined by:</p>
            <disp-formula>
              <graphic xlink:href="medinform_v8i7e16008_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
            <p>We have also extended our definitions of TP, TN, FP, and FN to consider <italic>fresh concepts</italic> only. Fresh concepts are concepts that appear in the query EHR suffix, <inline-graphic xlink:href="medinform_v8i7e16008_fig11.png" xlink:type="simple" mimetype="image"/>, which do not appear in the query EHR-prefix, <inline-graphic xlink:href="medinform_v8i7e16008_fig12.png" xlink:type="simple" mimetype="image"/>. We argue that fresh concepts are more challenging and have a higher potential to be clinically useful for prediction. We analyzed fresh concepts separately from all concepts as concepts that appear in the query EHR prefix are likely to persist into the suffix and thus would skew our evaluation of fresh concepts. Therefore, we ignore concepts that appear in <inline-graphic xlink:href="medinform_v8i7e16008_fig13.png" xlink:type="simple" mimetype="image"/> when evaluating any measures concerning TP, TN, FP, or FN.</p>
            <p><xref rid="figure3" ref-type="fig">Figure 3</xref> illustrates the connection between the entire set of concepts <italic>U</italic>, the predicted set of concepts <italic>C</italic>, and the ground truth <inline-graphic xlink:href="medinform_v8i7e16008_fig14.png" xlink:type="simple" mimetype="image"/>. In our experiments, the size of <inline-graphic xlink:href="medinform_v8i7e16008_fig15.png" xlink:type="simple" mimetype="image"/>, and thus, the number of TNs skews the value of specificity. Therefore, we assessed the parameters and interpatient distance measures using the harmonic mean of sensitivity and precision, commonly known as the F-measure in information retrieval.</p>
            <fig id="figure3" position="float">
              <label>Figure 3</label>
              <caption>
                <p>The connection between the ground truth concepts and the predicted concept space.</p>
              </caption>
              <graphic xlink:href="medinform_v8i7e16008_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Interpatient Distance Measures</title>
        <p>We evaluated 4 interpatient dissimilarity measures proposed in the literature [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]: (1) <italic>BoC</italic>, (2) <italic>CAs</italic>, (3) <italic>APL</italic>, and (4) symmetric APL (<italic>APL_SYM</italic>).</p>
        <p>Let <italic>A</italic> and <italic>B</italic> be the sets of medical concepts.</p>
        <p>For BoC, the dissimilarity between <italic>A</italic> and <italic>B</italic> is defined as the sum of the number of concepts that appear in A but not in B and in B but not in A, divided by the size of their union [<xref ref-type="bibr" rid="ref5">5</xref>]. union of <italic>A</italic> and <italic>B</italic> is also a set, and therefore, the size of the union only considers each concept once:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v8i7e16008_fig16.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>BoC produces values between 0 and 1, where 0 represents maximum similarity, and 1 represents minimum similarity. Note that BoC is symmetric; hence, <italic>BoC</italic>(<italic>A, B</italic>)=<italic>BoC</italic>(<italic>B, A</italic>).</p>
        <p>In CA, for each concept, for each concept c<sub>a</sub> in A, we retrieved all ancestor concepts in the concept hierarchy and assigned to each concept and its ancestors a weight, where each c<sub>a</sub> is assigned a weight of 1, and ancestors of each c<sub>a</sub> are assigned a weight relative to their distance from c<sub>a</sub>. An analogous weighting procedure is applied to all concepts and their ancestors in <italic>B</italic>. Weights are averaged if a node is assigned more than one weight.</p>
        <p>Let <italic>A'</italic> and <italic>B'</italic> be the set of concepts and their ancestors for <italic>A</italic> and <italic>B</italic>, respectively. When computing the dissimilarity from <italic>A</italic> to <italic>B</italic>, we examined each concept in <italic>A’</italic> and check if it exists in <italic>B’</italic>. If it exists, the given concept in <italic>A’</italic> is assigned a value equal to its own weight, and zero otherwise [<xref ref-type="bibr" rid="ref4">4</xref>]:
        <disp-formula><graphic xlink:href="medinform_v8i7e16008_fig17.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/></disp-formula>
        where <italic>w (c<sub>i</sub>)</italic> is the weight assigned to the concept <italic>c<sub>i</sub></italic>. Hence, the abovementioned sum measures the overlap between the concepts and the ancestors of <italic>A</italic> and <italic>B</italic>. Scores from CA range from 0 to 1, where a score of 0 represents maximum similarity, and 1 represents minimum similarity. By definition, CA is not symmetric.</p>
        <p>The APL measure finds the minimum number of edges between each concept in A with every concept in B. APL sums the distances across all concepts in A to obtain the dissimilarity of <italic>A</italic> to <italic>B</italic> [<xref ref-type="bibr" rid="ref5">5</xref>]:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v8i7e16008_fig18.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>A score of 0 implies a maximum similarity. By definition, APL is not symmetric; APL_SYM is the sum of <italic>A</italic> to <italic>B</italic> and <italic>B</italic> to <italic>A</italic>:</p>
        <disp-formula>
          <inline-graphic xlink:href="medinform_v8i7e16008_fig19.png" xlink:type="simple" mimetype="image"/>
        </disp-formula>
      </sec>
      <sec>
        <title>Preparation of Multiparameter Intelligent Monitoring in Intensive Care II Data Set</title>
        <p>We applied our framework and the aforementioned interpatient dissimilarity measures to the MIMIC II clinical database—a database of EHRs collected over a 7-year period from multiple ICUs at a medical center in Boston [<xref ref-type="bibr" rid="ref26">26</xref>]. Several types of clinical notes are recorded during a visit, including radiology reports, nursing notes, and physician notes. We parsed each note to extract medical concepts from the clinical text. Each note is associated with a timestamp that represents its creation time. We used these timestamps to map notes to events, defined as ward transfers, generating a list of concepts for each event.</p>
        <p>First, we parsed medical concepts from each type of note using the MetaMap library [<xref ref-type="bibr" rid="ref25">25</xref>]. Before parsing each note, abbreviations such as <italic>OMG</italic> were identified and expanded using an abbreviation list similar to the list of Wiley et al [<xref ref-type="bibr" rid="ref27">27</xref>]. The MetaMap library maps free text to biomedical concepts are defined in the Unified Medical Language System (UMLS) [<xref ref-type="bibr" rid="ref28">28</xref>]. Each concept in the UMLS corresponds to one or more semantic types [<xref ref-type="bibr" rid="ref29">29</xref>], which further maps to semantic groups [<xref ref-type="bibr" rid="ref30">30</xref>]. Previous studies have shown that disorders, physiology, chemicals and drugs, procedures, and anatomy are the most important UMLS semantic groups when measuring interpatient similarity [<xref ref-type="bibr" rid="ref11">11</xref>]. Negated concepts are identified via MetaMap, and these concepts are ignored, as previous work has shown that absent concepts are not relevant to patient similarity [<xref ref-type="bibr" rid="ref11">11</xref>]. After obtaining a list of relevant concepts, each concept from the UMLS is converted to a concept from SNOMED-CT using the MRCONSO table [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
        <p>A single patient visit may consist of several transfers between wards. Each of these transfers is considered to be a <italic>census event</italic> in the MIMIC II database. The rationale for this definition of an event is that each time a patient enters a new care unit, there may be a significant change in the patient’s status, for example, the patient’s condition worsened, and he was transferred to the surgical ICU.</p>
        <p>If a patient visits a hospital multiple times, each visit is treated independently, that is, multiple visits are viewed as different patients for the purpose of our similarity matching algorithm. This decision is not critical for the MIMIC II data set because a majority of patients only have one visit. Related work has shown that the abovementioned concept of census events provides an effective timeline of a patient’s record, where concepts within an event are semantically associated with each other [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
      </sec>
      <sec>
        <title>Computation Time Analysis</title>
        <p>The computation cost to extract ancestors is linear with respect to the number of ancestors. As the ontology is a wide directed acyclic graph (DAG) instead of a deep one, each concept has up to 61 ancestors, and 29 ancestors on average. We used Dewey encoding to speed up both the retrieval of ancestors and calculation of concept distance. In particular, a concept’s Dewey encoding encapsulates its ancestor information, for example, if concept <italic>C2315591</italic> is encoded as <italic>$.8.96.45</italic>, this implies that the concept’s ancestors are <italic>$.8</italic> and <italic>$.8.96</italic>. Using Dewey [<xref ref-type="bibr" rid="ref33">33</xref>] encodings, the distance between 2 concepts is reduced to be a string comparison between their encodings; that is, we computed the distance between the concepts and their lowest common ancestor, which again has cost linear on the DAG depth.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Anecdotal Example</title>
        <p>We started with a real anonymized example from the MIMIC II dataset to demonstrate the potential utility of our approach. Bob was involved in a motor vehicle collision where he struck his head and lost consciousness. He arrived at the medical ICU with a chief complaint of severe shoulder pain and bleeding from his nostrils. After arriving at the medical ICU (event 1), Bob was transferred to the surgical ICU for further care (event 2). During his stay in the surgical ICU, the staff observed symptoms of pneumonia and pulmonary aspiration. Bob was then transported to radiology (event 3), where tests revealed that Bob indeed had both pneumonia and pulmonary aspiration. We executed our prediction method using event 1 as a query. In particular, we used <italic>CA</italic>, with τ=0.5 and λ=0.3. Of the suffixes of patients with similar EHR prefixes, 50% contain the concepts of pneumonia and pulmonary aspiration, whereas 29% and 23% of all patients in the general ICU population contained the concepts of pneumonia and pulmonary aspiration, respectively.</p>
      </sec>
      <sec>
        <title>Event-Based Analysis of the Multiparameter Intelligent Monitoring in Intensive Care II Data Set</title>
        <p>We only considered visits with more than one event because visits with 1 event cannot be split into EHR prefixes and EHR suffixes. In total, there are 4083 visits over 3971 unique patients; thus, patients with multiple visits account for less than 3% of the total number of visits. Visits with 2 events dominate the data set, accounting for 80% of the total visits, whereas visits with 3 events accounted for 15% of the total visits. In general, a longer visit produces more medical concepts, implying that new concepts are found as the patient’s visit progresses. Visits of length 2, 3, and 4, respectively, have 291, 434, and 539 unique medical concepts on average. The corresponding number for visits of more than 4 events is 725. On average, each event contains 187 medical concepts, and each visit contains 325 medical concepts. Furthermore, these concepts are dominated by disorders (36%) and procedures (22%). The other concept semantic groups are anatomy (20%), drugs (12%), and physiology (10%).</p>
      </sec>
      <sec>
        <title>Prediction Results</title>
        <p>We evaluated the interpatient distance measures BoC, CA, APL, and APL_SYM on the aforementioned admissions of the MIMIC II database using our framework of EHR prefixes and EHR suffixes. Our first objective was to tune the parameters τ and λ using the <italic>F</italic> measure. We split the admissions into training and testing datasets, where 20% of the admissions were used for training, and 80% of the admissions were used for testing. <xref ref-type="table" rid="table1">Table 1</xref> reports the combination of τ and λ that produced the highest <italic>F</italic> measure for each interpatient distance measure using the training data set. APL_SYM obtains the highest <italic>F</italic> measure, precision, and sensitivity, whereas APL obtains the highest specificity.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>The best parameters for each distance function based on the training data set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="80"/>
            <col width="110"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>
                  <italic>DisSim</italic>
                </td>
                <td>τ</td>
                <td>λ</td>
                <td><italic>F</italic> measure (%)</td>
                <td>Specificity (%)</td>
                <td>Sensitivity (%)</td>
                <td>Precision (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Bag-of-concept</td>
                <td>0.7</td>
                <td>0.08</td>
                <td>51.8</td>
                <td>86.9</td>
                <td>52.9</td>
                <td>50.6</td>
              </tr>
              <tr valign="top">
                <td>Common ancestor</td>
                <td>0.46</td>
                <td>0.25</td>
                <td>48.9</td>
                <td>94.0</td>
                <td>55.2</td>
                <td>43.9</td>
              </tr>
              <tr valign="top">
                <td>Average path length</td>
                <td>1.5</td>
                <td>0.30</td>
                <td>48.7</td>
                <td>94.9</td>
                <td>52.6</td>
                <td>45.4</td>
              </tr>
              <tr valign="top">
                <td>Symmetric average path length</td>
                <td>1.86</td>
                <td>0.07</td>
                <td>52.4</td>
                <td>84.4</td>
                <td>52.9</td>
                <td>52.0</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p><xref rid="figure4" ref-type="fig">Figure 4</xref> illustrates a graphical representation of the optimal parameters reported in <xref ref-type="table" rid="table1">Table 1</xref>, plotting λ on the y-axis and 1−τ on the x-axis. Thus, all concepts from the EHR suffixes of similar EHR prefixes are included with a score to the right of the corresponding vertical dashed line, and from these concepts, all concepts with a confidence above the corresponding horizontal dashed line are included in the predicted EHR suffix. Furthermore, APL and APL_SYM have been normalized by the maximum possible similarity score, where the maximum similarity score is defined as the maximum path length in SNOMED-CT. As shown in this figure, CA and BoC have larger values of dissimilarity compared with APL and APL_SYM. The tightest bounds for both thresholds are for APL and APL_SYM, and the loosest bound is for BoC. This is expected, as the average scores for BoC, CA, APL, and APL_SYM are 0.86, 0.31, 0.07, and 0.07, respectively. Moreover, APL and CA have tightest bounds on the confidence threshold; this is an interesting point, as APL and CA are antisymmetric, implying that <italic>symmetric interpatient distance measures require less confidence when predicting future medical concepts</italic>.</p>
        <p><xref ref-type="table" rid="table2">Table 2</xref> reports the results on the testing dataset using the optimal set of parameters reported in <xref ref-type="table" rid="table1">Table 1</xref> for fresh and not fresh concepts.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Representation of the optimal choice of the dissimilarity threshold τ and confidence threshold λ for the training data set. APL: average path length; BoC: bag-of-concept; CA: common ancestor; APL-SYM: symmetric average path length.</p>
          </caption>
          <graphic xlink:href="medinform_v8i7e16008_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>The results for the testing data set separated by semantic group, using the parameters tuned on the training data set for fresh and not fresh concepts.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="400"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="160"/>
            <col width="0"/>
            <col width="140"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Semantic group and <italic>DisSim</italic></td>
                <td colspan="2"><italic>F</italic> measure (%)</td>
                <td colspan="2">Specificity (%)</td>
                <td colspan="2">Sensitivity (%)</td>
                <td>Precision (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="9">
                  <bold>All concepts</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">BoC<sup>a</sup></td>
                <td colspan="2">51.7</td>
                <td colspan="2">87.1</td>
                <td>52.6</td>
                <td>50.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CA<sup>b</sup></td>
                <td colspan="2">48.9</td>
                <td colspan="2">94.1</td>
                <td>55.7</td>
                <td>43.7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">APL<sup>c</sup></td>
                <td colspan="2">48.7</td>
                <td colspan="2">94.8</td>
                <td>52.8</td>
                <td>45.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">APL_SYM<sup>d</sup></td>
                <td colspan="2">
                  <italic>52.3</italic>
                  <sup>e</sup>
                </td>
                <td colspan="2">84.2</td>
                <td>53.7</td>
                <td>50.9</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Disorders</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">BoC</td>
                <td colspan="2">49.4</td>
                <td colspan="2">87.8</td>
                <td>49.8</td>
                <td>49.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CA</td>
                <td colspan="2">44.7</td>
                <td colspan="2">95.0</td>
                <td>48.4</td>
                <td>41.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">APL</td>
                <td colspan="2">44.4</td>
                <td colspan="2">95.7</td>
                <td>46.1</td>
                <td>42.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">APL_SYM</td>
                <td colspan="2">
                  <italic>50.7</italic>
                </td>
                <td colspan="2">85.0</td>
                <td>51.8</td>
                <td>49.6</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Procedures</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">BoC</td>
                <td colspan="2">52.2</td>
                <td colspan="2">85.6</td>
                <td>53.3</td>
                <td>51.3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CA</td>
                <td colspan="2">48.9</td>
                <td colspan="2">92.6</td>
                <td>57.9</td>
                <td>42.3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">APL</td>
                <td colspan="2">48.0</td>
                <td colspan="2">93.6</td>
                <td>54.0</td>
                <td>43.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">APL_SYM</td>
                <td colspan="2">
                  <italic>52.6</italic>
                </td>
                <td colspan="2">82.0</td>
                <td>54.4</td>
                <td>51.0</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Chemicals and drugs</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">BoC</td>
                <td colspan="2">49.7</td>
                <td colspan="2">89.8</td>
                <td>49.1</td>
                <td>50.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CA</td>
                <td colspan="2">48.5</td>
                <td colspan="2">96.4</td>
                <td>49.9</td>
                <td>47.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">APL</td>
                <td colspan="2">48.1</td>
                <td colspan="2">96.9</td>
                <td>47.3</td>
                <td>48.9</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">APL_SYM</td>
                <td colspan="2">
                  <italic>50.2</italic>
                </td>
                <td colspan="2">87.7</td>
                <td>49.8</td>
                <td>50.7</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Physiology</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">BoC</td>
                <td colspan="2">56.6</td>
                <td colspan="2">82.3</td>
                <td>57.1</td>
                <td>56.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CA</td>
                <td colspan="2">57.8</td>
                <td colspan="2">89.5</td>
                <td>69.5</td>
                <td>49.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">APL</td>
                <td colspan="2">
                  <italic>58.4</italic>
                </td>
                <td colspan="2">90.6</td>
                <td>67.6</td>
                <td>51.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">APL_SYM</td>
                <td colspan="2">56.9</td>
                <td colspan="2">80.1</td>
                <td>57.7</td>
                <td>56.1</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>BOC: bag-of-concept.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>CA: common ancestor.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>APL: average path length.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>APL_SYM: symmetric average path length.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>Italicized numbers indicate the best result of the semantic group.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Similarly, <xref ref-type="table" rid="table3">Table 3</xref> reports the same results for fresh concepts only; <italic>fresh concepts are concepts that do not appear in the query EHR prefix and, therefore, are fresh to the query EHR suffix</italic>. We categorized each concept into its semantic group and analyzed each interpatient distance measure with all concepts and concepts restricted to a semantic group; anatomical concepts are omitted in this analysis, as predicting an anatomical site, such as lower back, is not useful in a clinical setting.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>The results for the testing data set separated by semantic group, using the parameters tuned on the training data set for fresh concepts only.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="230"/>
            <col width="200"/>
            <col width="190"/>
            <col width="180"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Semantic group and <italic>DisSim</italic></td>
                <td><italic>F</italic> measure (%)</td>
                <td>Specificity (%)</td>
                <td>Sensitivity (%)</td>
                <td>Precision (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>All concepts</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BoC<sup>a</sup></td>
                <td>43.7</td>
                <td>89.6</td>
                <td>43.3</td>
                <td>44.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CA<sup>b</sup></td>
                <td>34.8</td>
                <td>95.7</td>
                <td>37.7</td>
                <td>32.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>APL<sup>c</sup></td>
                <td>34.5</td>
                <td>96.5</td>
                <td>35.0</td>
                <td>34.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>APL_SYM<sup>d</sup></td>
                <td>
                  <italic>44.9</italic>
                  <sup>e</sup>
                </td>
                <td>86.8</td>
                <td>45.3</td>
                <td>44.6</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Disorders</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BoC</td>
                <td>42.1</td>
                <td>90.0</td>
                <td>40.8</td>
                <td>43.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CA</td>
                <td>32.1</td>
                <td>96.3</td>
                <td>32.9</td>
                <td>31.3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>APL</td>
                <td>31.8</td>
                <td>97.0</td>
                <td>30.6</td>
                <td>33.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>APL_SYM</td>
                <td>
                  <italic>44.1</italic>
                </td>
                <td>87.3</td>
                <td>43.7</td>
                <td>44.5</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Procedures</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BoC</td>
                <td>43.6</td>
                <td>88.5</td>
                <td>42.7</td>
                <td>44.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CA</td>
                <td>35.6</td>
                <td>94.8</td>
                <td>39.3</td>
                <td>32.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>APL</td>
                <td>34.7</td>
                <td>95.6</td>
                <td>36.4</td>
                <td>33.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>APL_SYM</td>
                <td>
                  <italic>44.7</italic>
                </td>
                <td>85.0</td>
                <td>44.6</td>
                <td>44.7</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Chemicals and drugs</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BoC</td>
                <td>38.6</td>
                <td>91.8</td>
                <td>34.9</td>
                <td>43.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CA</td>
                <td>30.4</td>
                <td>97.5</td>
                <td>26.6</td>
                <td>35.6</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>APL</td>
                <td>28.7</td>
                <td>97.9</td>
                <td>23.7</td>
                <td>36.3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>APL_SYM</td>
                <td>
                  <italic>39.9</italic>
                </td>
                <td>89.7</td>
                <td>36.5</td>
                <td>44.1</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Physiology</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BoC</td>
                <td>46.1</td>
                <td>86.1</td>
                <td>45.2</td>
                <td>47.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CA</td>
                <td>40.9</td>
                <td>92.8</td>
                <td>45.3</td>
                <td>37.3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>APL</td>
                <td>41.2</td>
                <td>93.8</td>
                <td>43.4</td>
                <td>39.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>APL_SYM</td>
                <td>
                  <italic>47.2</italic>
                </td>
                <td>83.9</td>
                <td>46.8</td>
                <td>47.5</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>BOC: bag-of-concept.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>CA: common ancestor.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>APL: average path length.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>APL_SYM: symmetric average path length.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>Italicized numbers indicate the best result of the semantic group.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>As shown in <xref ref-type="table" rid="table2">Table 2</xref>, the symmetric interpatient distance measures outperform the antisymmetric distance measures across all semantic groups, where APL_SYM performs the best; the only exception is physiology. Comparing these results with <xref ref-type="table" rid="table3">Table 3</xref> shows that the gap between symmetric and antisymmetric distance measures widens to a 10% difference in terms of <italic>F</italic> measure. That is, <italic>symmetric interpatient distance measures are more predictive of future medical concepts, especially for fresh concepts</italic>. When considering the symmetric measures APL_SYM and BoC, <italic>APL_SYM consistently performs better</italic>, achieving higher rates of sensitivity and precision in every case.</p>
        <p>Furthermore, the <italic>antisymmetric interpatient distance measures performed better with respect to specificity</italic> but achieved a lower precision. That is, antisymmetric distance measures predicted fewer concepts overall to achieve higher rates of specificity with lower rates of sensitivity and precision, which is explained by the conservative choice made during the tuning phase. Another interesting point is that all interpatient distance measures observed an increase in specificity for fresh concepts; however, this increase was greatest for symmetric interpatient distance measures. The reason is that the number of FP decreases for fresh concepts, whereas the nonfresh concepts are more frequently predicted to be in the suffix and, therefore, have a higher frequency of FPs.</p>
        <sec>
          <title>Clinical Significance of the Subset of Predicted Concepts</title>
          <p>We further examined 16 individual concepts identified as important by our physician author (RE) in the ICU setting. We focused on the TP cases (correctly predicted mention in the suffix) to validate the prediction’s importance and FN cases (incorrectly predicted no mention in the suffix) to detect possible significant misses. We presented our predictions in a web interface (<xref ref-type="table" rid="table4">Table 4</xref>), which is basically a table of predicted concepts, the patient’s EHR prefix/suffix and concepts influencing the prediction in highlight.</p>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>Predictions and explanations provided to our medical student and physician authors to label the clinical significance of a prediction.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="90"/>
              <col width="200"/>
              <col width="480"/>
              <col width="230"/>
              <thead>
                <tr valign="top">
                  <td>Patient ID</td>
                  <td>Predicted concept and time</td>
                  <td>Prefix at time of prediction</td>
                  <td>Suffix from time of prediction</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>22,487</td>
                  <td><italic>Bronchoscopy</italic> (3 hours:23 min:0 seconds)</td>
                  <td>...Resp: RR 16-20 has periods of apnea when asleep...<break/>…There is increased density in the right upper lung field with elevation of the minor fissure consistent with developing atelectasis in the right upper lobe…</td>
                  <td>…<italic>Bronchoscopy</italic> done secondary to low PaO2...</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <p>In <xref ref-type="table" rid="table4">Table 4</xref>, our domain expert is given a prediction, the patient history, and asked to evaluate if the prediction is helpful. Particularly, in the third column (<italic>Prefix at time of prediction</italic>), we presented the patient history up to the point that our system predicts that a concept(s) will appear in future (in the second column <italic>Predicted concept and time</italic>). The last column in <xref ref-type="table" rid="table4">Table 4</xref> (<italic>Suffix from time of prediction</italic>) shows events occurring after the prediction time so that our domain expect can judge if the system’s prediction is significant in the sense that the predicted concepts actually affect the patient and the prediction is not trivial, that is, obviously happen, thus no need for prediction. As we focused on the positive cases, the predictions actually appear in the patient’s suffix and thus are highlighted for the domain expert to evaluate.</p>
          <p>Our medical student and physician authors manually mark each case with 1 of 4 categories: (1) mentioned and performed; (2) concept mentioned but it is obvious (ie, little value to clinicians); (3) mentioned but only considered by physician, not performed (ie, the clinicians mentioned this concept in the suffix but in the end did not perform the procedure); and (4) mentioned, but out of context (eg, mentioned as part of the medical history of a patient or while describing a similar case). We reported additional metrics such as specificity, sensitivity, FP, and TN of 7 important concepts in the <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, ordered by concept name. We do not count the cases in which a predicted concept occurs in both the patient’s prefix and suffix. Moreover, if a patient history can be divided into multiple prefix-suffix pairs and the algorithm is able to make predictions for a long prefix, not for the shorter prefix, we do not count the case of a shorter prefix as a negative prediction.</p>
        </sec>
        <sec>
          <title>True-Positive Analysis</title>
          <p><xref ref-type="table" rid="table5">Table 5</xref> reports the fine-grained evaluation of TP cases. Note that we only presented predictions of 7 concepts because our algorithm did not predict the remaining 9 concepts. The bronchoscopy concept was successfully mentioned and performed in the suffix 63 of 63 times in a TP category. Bronchoscopy was positively identified with the keywords in the prefix, usually mentioning respiratory symptoms. Compared with bronchoscopy, surgery is a much more invasive procedure that requires consent of the patient and for the patient to be medically cleared for surgery. This caused 215 surgical concepts to be accurately mentioned and performed but have a significant portion mentioned out of context (16 times) or mentioned but only considered and not performed (25 times). Patients have a craniotomy performed for a variety of reasons. One craniotomy in the medical records analyzed was accurately mentioned and performed, but it was not needed to be predicted. The patient undergoing a craniotomy came in after a motor vehicle collision with an obvious facial fracture, thus not needing to predict the craniotomy, as it would be the only way to treat the patient. In summary, most TP predictions are useful. Overall, 13.1% of the predictions are unhelpful, and mostly fall into the surgery concept.</p>
          <table-wrap position="float" id="table5">
            <label>Table 5</label>
            <caption>
              <p>Expert evaluation of true positive predictions using 4 fine-grained categories.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="200"/>
              <col width="140"/>
              <col width="250"/>
              <col width="250"/>
              <col width="160"/>
              <thead>
                <tr valign="top">
                  <td>Concept</td>
                  <td>Mentioned and performed</td>
                  <td>Concept mentioned, but is obvious</td>
                  <td>Mentioned but only considered by physician, not performed</td>
                  <td>Mentioned, but out of context</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Bronchoscopy</td>
                  <td>63</td>
                  <td>0</td>
                  <td>0</td>
                  <td>0</td>
                </tr>
                <tr valign="top">
                  <td>Cardiac surgery</td>
                  <td>5</td>
                  <td>0</td>
                  <td>0</td>
                  <td>0</td>
                </tr>
                <tr valign="top">
                  <td>Colonoscopy</td>
                  <td>1</td>
                  <td>0</td>
                  <td>0</td>
                  <td>0</td>
                </tr>
                <tr valign="top">
                  <td>Craniotomy</td>
                  <td>9</td>
                  <td>1</td>
                  <td>1</td>
                  <td>1</td>
                </tr>
                <tr valign="top">
                  <td>Dialysis procedure</td>
                  <td>47</td>
                  <td>0</td>
                  <td>6</td>
                  <td>1</td>
                </tr>
                <tr valign="top">
                  <td>Refractive surgery enhancement</td>
                  <td>13</td>
                  <td>0</td>
                  <td>0</td>
                  <td>1</td>
                </tr>
                <tr valign="top">
                  <td>Surgery</td>
                  <td>215</td>
                  <td>1</td>
                  <td>25</td>
                  <td>16</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <p>We illustrated how our algorithm offers useful predictions using a TP case example. In patient ID 22,487, a bronchoscopy was successfully predicted in the suffix (<xref ref-type="table" rid="table4">Table 4</xref>). The patient had a history of coronary artery disease with chest pain and had a triple coronary artery bypass graft performed to alleviate his symptoms before the prefix. In the prefix, our algorithm highlighted (we <italic>highlighted</italic> a concept in the prefix if it is contributing to the prediction of the target concept in the suffix) <italic>effusion</italic> 7 times, <italic>apnea</italic> 6 times, and <italic>increased density</italic> one time, all related to pulmonary pathology. Heparin, a blood thinner, was also highlighted 7 times by our algorithm. The patient’ s respiratory state began to diminish and was eventually placed on a ventilator, as his course in the hospital progressed. Bronchoscopy was accurately predicted and performed on day 3 and hour 23 in the suffix <italic>secondary to low PaO<sub>2</sub></italic> with small amounts of suctioned thin secretions, and no plugs were found. The accurately predicted concept is interesting, as the patient was initially presented with chest pain–related symptoms treated by intervention through the cardiovascular organ system but was found to have concurrent complications in the pulmonary organ system.</p>
          <p>To obtain the full picture, we presented a TP example that is clinically incorrect. In patient 9122, a surgery was predicted in the suffix, but no performance of a surgery in the suffix was found. This patient was a 25-week premature twin baby born by cesarean section. The only mention of surgery in the suffix is an update by a neonatal intensive care unit nurse stating they were <italic>awaiting surgical time for twin</italic>. No surgery was considered or performed for this patient during the suffix and was only being medically managed for being born prematurely. One of the most highlighted words in the prefix used by the algorithm to predict surgery was <italic>bili</italic> with 35 mentions, <italic>bilirubin</italic> had 3 mentions, and <italic>phototherapy</italic> with 20 mentions—all related to jaundice. There were also multiple highlighted words related to respiratory symptoms, such as <italic>gas</italic> with 18 mentions, <italic>bicarb</italic> having 9 mentions, and 3 mentions for <italic>PCO<sub>2</sub></italic> Although no surgery plan was considered for the patient, the word surgery was present in the suffix, that is, this is an <italic>out of context</italic> prediction.</p>
          <p>In <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>, we examined how early our algorithm can predict concept occurrences. In particular, in TP cases, we calculated the time from the prefix’s end to the suffix’s beginning. For most concepts, the minimum times are almost 0 because there are suffixes that occur right after their prefixes. On average, our algorithm can predict concepts several days before their actual occurrences.</p>
        </sec>
        <sec>
          <title>False-Negative Analysis</title>
          <p>We presented the same evaluation on FN cases in <xref ref-type="table" rid="table6">Table 6</xref>. Although 53 bronchoscopies were accurately mentioned and performed, the FN had an additional concept mentioned in context (1 time) or mentioned but only considered and not performed (3 times). Colonoscopy appeared more in the FN group with 21 colonoscopies mentioned and performed but had a high quantity of concepts mentioned in context (5) or mentioned but only considered and not performed (13). The surgery group also mentioned and performed 154 concepts; however, similar to <xref ref-type="table" rid="table5">Table 5</xref>, it has a significant number of predictions made out of context (8) or mentioned but only considered and not performed (42). The refractive surgery enhancement concept had the lowest ratio of concepts accurately mentioned and performed (48) to those mentioned out of context (21) or mentioned but only considered and not performed (14). Overall, 24.8% of FN cases are unimportant because of being out of context or not being performed by physicians.</p>
          <table-wrap position="float" id="table6">
            <label>Table 6</label>
            <caption>
              <p>Expert evaluation of false negative predictions using 4 fine-grained categories (for instance, surgery was not predicted to be in suffix, and it appears in the suffix).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="200"/>
              <col width="140"/>
              <col width="250"/>
              <col width="250"/>
              <col width="160"/>
              <thead>
                <tr valign="top">
                  <td>Concept</td>
                  <td>Mentioned and performed</td>
                  <td>Concept mentioned, but not needed for prediction</td>
                  <td>Mentioned but only considered by physician, not performed</td>
                  <td>Mentioned, but out of context</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Bronchoscopy</td>
                  <td>49</td>
                  <td>0</td>
                  <td>3</td>
                  <td>1</td>
                </tr>
                <tr valign="top">
                  <td>Cardiac surgery</td>
                  <td>40</td>
                  <td>0</td>
                  <td>6</td>
                  <td>0</td>
                </tr>
                <tr valign="top">
                  <td>Colonoscopy</td>
                  <td>26</td>
                  <td>0</td>
                  <td>13</td>
                  <td>5</td>
                </tr>
                <tr valign="top">
                  <td>Craniotomy</td>
                  <td>23</td>
                  <td>0</td>
                  <td>2</td>
                  <td>1</td>
                </tr>
                <tr valign="top">
                  <td>Dialysis procedure</td>
                  <td>46</td>
                  <td>0</td>
                  <td>6</td>
                  <td>1</td>
                </tr>
                <tr valign="top">
                  <td>Refractive surgery enhancement</td>
                  <td>48</td>
                  <td>0</td>
                  <td>14</td>
                  <td>23</td>
                </tr>
                <tr valign="top">
                  <td>Surgery</td>
                  <td>154</td>
                  <td>0</td>
                  <td>43</td>
                  <td>9</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our results show that when applied to clinical concept prediction in ICU patients, symmetric interpatient distance measures are more robust in terms of <italic>F</italic> measure, sensitivity, and precision. Furthermore, antisymmetric interpatient distance measures performed the best in terms of specificity. Hence, antisymmetric interpatient distance measures are more conservative when predicting future medical concepts, as explained by their high confidence thresholds and high levels of specificity, whereas symmetric interpatient distance measures observe a 10% gain in precision and sensitivity over antisymmetric measures. Thus, symmetric interpatient distance measures are more predictive of future medical concepts. Overall, the APL_SYM performed the best.</p>
        <p>We further evaluated the clinical value of the predictions. Our medical student and physician authors manually examined the TP and FN predictions of 16 important concepts. We found that 86.9% (353/406) of TP predictions are performed later, and only 4.7% (19/406) of the cases are totally out of context. This early concept prediction capability implies substantial impacts, such as avoiding potential high-risk events and improving patient outcomes at lower costs. On the other hand, our algorithm missed 513 FN cases, but 24.7% of them were clinically unimportant. Specifically, these missed concepts do appear in the patient suffixes but are out of context, or not needed, or not performed by the physician.</p>
        <p>As an example of an application of the proposed methods in a real setting, we considered using these methods to periodically automatically predict the estimated number of patients in a hospital that will require bronchoscopy. This may allow for better resource planning.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>We recognized that in its current form, our system is not sufficiently accurate for deployment. In particular, concern arises when giving a patient or their family access to our proposed methods—incorrectly predicting an undesired concept may incur unneeded stress and anxiety. In this regard, we may calibrate the confidence parameters to achieve higher precision and have an expert manually select the set of concepts that are appropriate to present to patients. As an example of a potential application, such a controlled prediction module could be deployed in a patient portal of a health insurance company, where a patient can already view his or her EHR.</p>
        <p>From a medical perspective, ICUs are often numerically oriented with vital signs, pressure readings, laboratory values, and ventilator readings. Furthermore, ICUs move at a fast pace, and hence, using the granularity of ward transfers is perhaps too broad in the ICU setting. Therefore, our proposed methods will most likely achieve different results in a primary care or outpatient setting. An interesting analysis would be to compare long-term predictions in the outpatient setting with near-term predictions in the ICU setting.</p>
        <p>However, the MIMIC database is one of the few, if only publicly available databases of EHRs that are rich in both clinical notes and temporal data. Clinical notes enable a rich collection of clinical concepts and hence allow for the prediction of a broad range of clinical concepts. <italic>For example</italic>, an EHR database containing only disease classifications will represent diabetes but will fail to represent insulin; hence, insulin cannot be predicted. Furthermore, temporal data allow us to sort medical concepts into prefixes and suffixes.</p>
        <p>Another medical limitation is that we did not weigh concepts based on their clinical importance. For example, the concept of <italic>cardiac arrest</italic> is more important in terms of similarity and predictive value than the concept of <italic>coughing</italic>. Moreover, the importance of a clinical concept depends on its application and domain. Furthermore, we need to assess the accuracy required for our system to be useful to patients, clinicians, and researchers. This accuracy requirement could be assessed through user evaluations.</p>
        <p>From a technical perspective, a key limitation is the assumption that MetaMap correctly identifies all concepts written in a clinical note. MetaMap has achieved reasonable precision and recall values (80% and 79%, respectively) when identifying medical concepts from clinical notes [<xref ref-type="bibr" rid="ref34">34</xref>]. Given the raw text of a clinical note, this assumption is clearly invalid because of abbreviations in the clinical note and errors generated by MetaMap. We address abbreviations by using a manually crafted list of medical abbreviations common to clinical notes; thus, potential errors caused by ambiguities because of common abbreviations were minimized. Furthermore, we argue that errors generated by MetaMap are a natural language processing problem, which is beyond the scope of this study. MetaMap limitation also holds with any other automatic extraction tool. To mitigate this, our physician author manually evaluated the clinical significance of TP predictions for a subset of interesting concepts.</p>
        <p>Another technical limitation is that we evaluated our algorithm strictly, in that we only accepted predictions that exactly predicted the corresponding concept. <italic>For example</italic>, if we predicted <italic>cancer</italic> when the actual concept was <italic>breast cancer</italic>, then our prediction of cancer would be marked as an FP, when our prediction was semantically relevant. Hence, including semantically similar concepts, either through is-a (ISA) ancestors or other semantic relations, has the potential to increase the accuracy of our algorithm while remaining relevant to clinical decision support.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this paper, we studied the problem of predicting future medical concepts in a patient’s EHR. The key idea of our method was to find patients with similar EHR prefixes using various interpatient similarity measures and then predict medical concepts that have high confidence in EHR suffixes of those patients. Our results showed that this is a promising approach to predict possible future concepts in a patient’s EHR. Of the multiple symmetric and antisymmetric interpatient similarity measures, the APL_SYM achieved the highest accuracy in our evaluation. We further evaluated the predictions of 16 important concepts manually and found that 86.9% of TP predictions are performed later. These initial results indicate that predicting a patient’s future medical concepts is feasible.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Prediction performance results for important concepts selected by our physician author. We do not count the cases that a predicted concept occurs in both patient’s prefix and suffix.</p>
        <media xlink:href="medinform_v8i7e16008_app1.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Time from our algorithm prediction to the actual occurrence of the concepts in suffix for true positive cases (The time is formatted as dd hh:mm:ss, where dd is dropped if the time is less than a day).</p>
        <media xlink:href="medinform_v8i7e16008_app2.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">APL</term>
          <def>
            <p>average path length</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">APL_SYM</term>
          <def>
            <p>symmetric average path length</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BoC</term>
          <def>
            <p>bag-of-concept</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CA</term>
          <def>
            <p>common ancestor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">DAG</term>
          <def>
            <p>directed acyclic graph</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">FN</term>
          <def>
            <p>false-negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">FP</term>
          <def>
            <p>false-positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">ICU</term>
          <def>
            <p>intensive care unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">MIMIC</term>
          <def>
            <p>Multiparameter Intelligent Monitoring in Intensive Care</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">SNOMED-CT</term>
          <def>
            <p>systemized nomenclature of MEDical clinical terms</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">TN</term>
          <def>
            <p>true-negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">TP</term>
          <def>
            <p>true-positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">UMLS</term>
          <def>
            <p>unified medical language system</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This project was partially supported by the National Science Foundation grants IIS-1838222, IIS-1619463, and IIS-1901379.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van de Belt</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Engelen</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Berben</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Schoonhoven</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Definition of health 2.0 and medicine 2.0: a systematic review</article-title>
          <source>J Med Internet Res</source>
          <year>2010</year>
          <month>06</month>
          <day>11</day>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>e18</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2010/2/e18/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1350</pub-id>
          <pub-id pub-id-type="medline">20542857</pub-id>
          <pub-id pub-id-type="pii">v12i2e18</pub-id>
          <pub-id pub-id-type="pmcid">PMC2956229</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Casey</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Adler</surname>
              <given-names>NE</given-names>
            </name>
          </person-group>
          <article-title>Using electronic health records for population health research: a review of methods and applications</article-title>
          <source>Annu Rev Public Health</source>
          <year>2016</year>
          <volume>37</volume>
          <fpage>61</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26667605"/>
          </comment>
          <pub-id pub-id-type="doi">10.1146/annurev-publhealth-032315-021353</pub-id>
          <pub-id pub-id-type="medline">26667605</pub-id>
          <pub-id pub-id-type="pmcid">PMC6724703</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Swan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Emerging patient-driven health care models: an examination of health social networks, consumer personalized medicine and quantified self-tracking</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2009</year>
          <month>02</month>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>492</fpage>
          <lpage>525</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.mdpi.com/resolver?pii=ijerph6020492"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph6020492</pub-id>
          <pub-id pub-id-type="medline">19440396</pub-id>
          <pub-id pub-id-type="pii">ijerph6020492</pub-id>
          <pub-id pub-id-type="pmcid">PMC2672358</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wicks</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Keininger</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Massagli</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>de la Loge</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Isojärvi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Heywood</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Perceived benefits of sharing health data between people with epilepsy on an online platform</article-title>
          <source>Epilepsy Behav</source>
          <year>2012</year>
          <month>01</month>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>16</fpage>
          <lpage>23</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1525-5050(11)00560-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.yebeh.2011.09.026</pub-id>
          <pub-id pub-id-type="medline">22099528</pub-id>
          <pub-id pub-id-type="pii">S1525-5050(11)00560-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Frost</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Massagli</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <article-title>Social uses of personal health information within PatientsLikeMe, an online patient community: what can happen when patients have access to one another's data</article-title>
          <source>J Med Internet Res</source>
          <year>2008</year>
          <month>05</month>
          <day>27</day>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>e15</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2008/3/e15/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1053</pub-id>
          <pub-id pub-id-type="medline">18504244</pub-id>
          <pub-id pub-id-type="pii">v10i3e15</pub-id>
          <pub-id pub-id-type="pmcid">PMC2553248</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Frost</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Massagli</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Wicks</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Heywood</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>How the social web supports patient experimentation with a new therapy: the demand for patient-controlled and patient-centered informatics</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2008</year>
          <month>11</month>
          <day>6</day>
          <fpage>217</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18999176"/>
          </comment>
          <pub-id pub-id-type="medline">18999176</pub-id>
          <pub-id pub-id-type="pmcid">PMC2656086</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Longhurst</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Harrington</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>A 'green button' for using aggregate patient data at the point of care</article-title>
          <source>Health Aff (Millwood)</source>
          <year>2014</year>
          <month>07</month>
          <volume>33</volume>
          <issue>7</issue>
          <fpage>1229</fpage>
          <lpage>35</lpage>
          <pub-id pub-id-type="doi">10.1377/hlthaff.2014.0099</pub-id>
          <pub-id pub-id-type="medline">25006150</pub-id>
          <pub-id pub-id-type="pii">33/7/1229</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cabitza</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rasoini</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gensini</surname>
              <given-names>GF</given-names>
            </name>
          </person-group>
          <article-title>Unintended consequences of machine learning in medicine</article-title>
          <source>J Am Med Assoc</source>
          <year>2017</year>
          <month>08</month>
          <day>8</day>
          <volume>318</volume>
          <issue>6</issue>
          <fpage>517</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2017.7797</pub-id>
          <pub-id pub-id-type="medline">28727867</pub-id>
          <pub-id pub-id-type="pii">2645762</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Markatou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Use abstracted patient-specific features to assist an information-theoretic measurement to assess similarity between medical cases</article-title>
          <source>J Biomed Inform</source>
          <year>2008</year>
          <month>12</month>
          <volume>41</volume>
          <issue>6</issue>
          <fpage>882</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(08)00044-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2008.03.006</pub-id>
          <pub-id pub-id-type="medline">18487093</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(08)00044-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC2584163</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mabotuwana</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen-Solal</surname>
              <given-names>EV</given-names>
            </name>
          </person-group>
          <article-title>An ontology-based similarity measure for biomedical data-application to radiology reports</article-title>
          <source>J Biomed Inform</source>
          <year>2013</year>
          <month>10</month>
          <volume>46</volume>
          <issue>5</issue>
          <fpage>857</fpage>
          <lpage>68</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(13)00088-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2013.06.013</pub-id>
          <pub-id pub-id-type="medline">23850839</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(13)00088-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Plaza</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Díaz</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Retrieval of Similar Electronic Health Records Using UMLS Concept Graphs</article-title>
          <source>Proceedings of the International Conference on Application of Natural Language to Information Systems</source>
          <year>2010</year>
          <conf-name>NLDB'10</conf-name>
          <conf-date>June 23-25, 2010</conf-date>
          <conf-loc>Cardiff, United Kingdom</conf-loc>
          <fpage>296</fpage>
          <lpage>303</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-642-13881-2_31</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Parsons</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Morrison</surname>
              <given-names>FP</given-names>
            </name>
            <name name-style="western">
              <surname>Rothschild</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Markatou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Inter-patient distance metrics using SNOMED CT defining relationships</article-title>
          <source>J Biomed Inform</source>
          <year>2006</year>
          <month>12</month>
          <volume>39</volume>
          <issue>6</issue>
          <fpage>697</fpage>
          <lpage>705</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(06)00020-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2006.01.004</pub-id>
          <pub-id pub-id-type="medline">16554186</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(06)00020-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wongsuphasawat</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gotz</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Outflow: Visualizing Patient Flow by Symptoms and Outcome</article-title>
          <source>Proceedings of the IEEE VisWeek Workshop on Visual Analytics in Healthcare</source>
          <year>2011</year>
          <conf-name>IEEE VisWeek'11</conf-name>
          <conf-date>October 23, 2011</conf-date>
          <conf-loc>Providence, RI</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.semanticscholar.org/paper/Outflow-%3A-Visualizing-Patient-Flow-by-Symptoms-and-Wongsuphasawat-Gotz/f82bc74b05438a6739d51b78e4a64a78fc29a67b">https://www.semanticscholar.org/paper/Outflow-%3A-Visualizing-Patient-Flow-by-Symptoms-and- Wongsuphasawat-Gotz/f82bc74b05438a6739d51b78e4a64a78fc29a67b</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wongsuphasawat</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gotz</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Exploring flow, factors, and outcomes of temporal event sequences with the outflow visualization</article-title>
          <source>IEEE Trans Vis Comput Graph</source>
          <year>2012</year>
          <month>12</month>
          <volume>18</volume>
          <issue>12</issue>
          <fpage>2659</fpage>
          <lpage>68</lpage>
          <pub-id pub-id-type="doi">10.1109/TVCG.2012.225</pub-id>
          <pub-id pub-id-type="medline">26357175</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Gotz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Perer</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A Visual Analysis Approach to Cohort Study of Electronic Patient Records</article-title>
          <source>Proceedings of the Conference on Bioinformatics and Biomedicine</source>
          <year>2014</year>
          <conf-name>BIBM'12</conf-name>
          <conf-date>November 2-5, 2014</conf-date>
          <conf-loc>Seattle, WA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/BIBM.2014.6999214</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wicks</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Massagli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Frost</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Okun</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vaughan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bradley</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Heywood</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Sharing health data for better outcomes on PatientsLikeMe</article-title>
          <source>J Med Internet Res</source>
          <year>2010</year>
          <month>06</month>
          <day>14</day>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>e19</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2010/2/e19/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1549</pub-id>
          <pub-id pub-id-type="medline">20542858</pub-id>
          <pub-id pub-id-type="pii">v12i2e19</pub-id>
          <pub-id pub-id-type="pmcid">PMC2956230</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shickel</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Tighe</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bihorac</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rashidi</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Deep EHR: a survey of recent advances in deep learning techniques for electronic health record (EHR) analysis</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2018</year>
          <month>09</month>
          <volume>22</volume>
          <issue>5</issue>
          <fpage>1589</fpage>
          <lpage>604</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29989977"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/JBHI.2017.2767063</pub-id>
          <pub-id pub-id-type="medline">29989977</pub-id>
          <pub-id pub-id-type="pmcid">PMC6043423</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kidd</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>Deep patient: an unsupervised representation to predict the future of patients from the electronic health records</article-title>
          <source>Sci Rep</source>
          <year>2016</year>
          <month>05</month>
          <day>17</day>
          <volume>6</volume>
          <fpage>26094</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.1038/srep26094"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/srep26094</pub-id>
          <pub-id pub-id-type="medline">27185194</pub-id>
          <pub-id pub-id-type="pii">srep26094</pub-id>
          <pub-id pub-id-type="pmcid">PMC4869115</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Razavian</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sontag</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Multi-Task Prediction of Disease Onsets from Longitudinal Laboratory Tests</article-title>
          <source>Proceedings of the 1st Machine Learning for Healthcare Conference</source>
          <year>2016</year>
          <conf-name>PMLR'16</conf-name>
          <conf-date>August 19-20, 2016</conf-date>
          <conf-loc>Los Angeles, CA</conf-loc>
          <fpage>73</fpage>
          <lpage>100</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://proceedings.mlr.press/v56/Razavian16.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lipton</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Kale</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Elkan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wetzel</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Learning to diagnose with LSTM recurrent neural networks</article-title>
          <source>arXiv preprint</source>
          <year>2015</year>
          <comment>epub ahead of print - 1511.03677<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1511.03677"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bahadori</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Schuetz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Doctor AI: Predicting Clinical Events via Recurrent Neural Networks</article-title>
          <source>Proceedings of the Conference on Machine Learning and Healthcare Conference</source>
          <year>2016</year>
          <conf-name>MLHC'16</conf-name>
          <conf-date>August 19-20, 2016</conf-date>
          <conf-loc>Los Angeles, CA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wickramasinghe</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Venkatesh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Deepr: a convolutional net for medical records</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2017</year>
          <month>01</month>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>22</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2016.2633963</pub-id>
          <pub-id pub-id-type="medline">27913366</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rajkomar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oren</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Hajaj</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hardt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sundberg</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Flores</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Duggan</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Irvine</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Litsch</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mossin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tansuwan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wexler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ludwig</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Volchenboum</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Madabushi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Howell</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Scalable and accurate deep learning with electronic health records</article-title>
          <source>NPJ Digit Med</source>
          <year>2018</year>
          <month>05</month>
          <day>8</day>
          <volume>1</volume>
          <fpage>18</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31304302"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-018-0029-1</pub-id>
          <pub-id pub-id-type="medline">31304302</pub-id>
          <pub-id pub-id-type="pii">29</pub-id>
          <pub-id pub-id-type="pmcid">PMC6550175</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stearns</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Price</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Spackman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>SNOMED clinical terms: overview of the development process and project status</article-title>
          <source>Proc AMIA Symp</source>
          <year>2001</year>
          <fpage>662</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/11825268"/>
          </comment>
          <pub-id pub-id-type="medline">11825268</pub-id>
          <pub-id pub-id-type="pii">D010001608</pub-id>
          <pub-id pub-id-type="pmcid">PMC2243297</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Effective mapping of biomedical text to the UMLS metathesaurus: the MetaMap program</article-title>
          <source>Proc AMIA Symp</source>
          <year>2001</year>
          <fpage>17</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/11825149"/>
          </comment>
          <pub-id pub-id-type="medline">11825149</pub-id>
          <pub-id pub-id-type="pii">D010001275</pub-id>
          <pub-id pub-id-type="pmcid">PMC2243666</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saeed</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lieu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Raber</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>MIMIC II: a massive temporal ICU patient database to support research in intelligent patient monitoring</article-title>
          <source>Comput Cardiol</source>
          <year>2002</year>
          <volume>29</volume>
          <fpage>641</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="medline">14686455</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wiley</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hristidis</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Esterling</surname>
              <given-names>KM</given-names>
            </name>
          </person-group>
          <article-title>Pharmaceutical drugs chatter on online social networks</article-title>
          <source>J Biomed Inform</source>
          <year>2014</year>
          <month>06</month>
          <volume>49</volume>
          <fpage>245</fpage>
          <lpage>54</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(14)00063-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2014.03.006</pub-id>
          <pub-id pub-id-type="medline">24637141</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(14)00063-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The unified medical language system (UMLS): integrating biomedical terminology</article-title>
          <source>Nucleic Acids Res</source>
          <year>2004</year>
          <month>01</month>
          <day>1</day>
          <volume>32</volume>
          <issue>Database Issue</issue>
          <fpage>D267</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/14681409"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id>
          <pub-id pub-id-type="medline">14681409</pub-id>
          <pub-id pub-id-type="pii">32/suppl_1/D267</pub-id>
          <pub-id pub-id-type="pmcid">PMC308795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <source>National Library of Medicine</source>
          <access-date>2019-08-21</access-date>
          <comment>Current Semantic Types<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.nlm.nih.gov/research/umls/META3_current_semantic_types.html">http://www.nlm.nih.gov/research/umls/META3_current_semantic_types.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <source>The Semantic Network: National Library of Medicine - NIH</source>
          <access-date>2019-08-21</access-date>
          <comment>The UMLS Semantic Network<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://semanticnetwork.nlm.nih.gov/">https://semanticnetwork.nlm.nih.gov/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <source>NCBI</source>
          <year>2019</year>
          <access-date>2019-08-21</access-date>
          <comment>Metathesaurus: Original Release Format (ORF)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ncbi.nlm.nih.gov/books/NBK9682/">https://www.ncbi.nlm.nih.gov/books/NBK9682/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raghavan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fosler-Lussier</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Learning to Temporally Order Medical Events in Clinical Text</article-title>
          <source>Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics</source>
          <year>2014</year>
          <conf-name>ACL'14</conf-name>
          <conf-date>July 8, 2012</conf-date>
          <conf-loc>Jeju Island, Korea</conf-loc>
          <fpage>70</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.3115/v1/p14-1094</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tatarinov</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Viglas</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Beyer</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Shanmugasundaram</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shekita</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Storing and Querying Ordered XML Using a Relational Database System</article-title>
          <source>Proceedings of the 2002 ACM SIGMOD international conference on Management of data</source>
          <year>2002</year>
          <conf-name>SIGMOD'02</conf-name>
          <conf-date>June 3-6, 2002</conf-date>
          <conf-loc>Wisconsin, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/564691.564715</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Osborne</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Gyawali</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Solorio</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of YTEX and MetaMap for clinical concept recognition</article-title>
          <source>arXiv preprint</source>
          <year>2014</year>
          <fpage>-</fpage>
          <comment>epub ahead of print - 1402.1668<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1402.1668"/></comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
