<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i1e15510</article-id>
      <article-id pub-id-type="pmid">32012067</article-id>
      <article-id pub-id-type="doi">10.2196/15510</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Longitudinal Risk Prediction of Chronic Kidney Disease in Diabetic Patients Using a Temporal-Enhanced Gradient Boosting Machine: Retrospective Cohort Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Johansson</surname>
            <given-names>Marcia</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>op den Buijs</surname>
            <given-names>Jorn</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Song</surname>
            <given-names>Xing</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3712-2904</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Waitman</surname>
            <given-names>Lemuel R</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4748-2898</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>Alan SL</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1776-2533</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Robbins</surname>
            <given-names>David C</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6890-440X</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Hu</surname>
            <given-names>Yong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4417-9378</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Mei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>University of Kansas Medical Center</institution>
            <institution>Department of Internal Medicine, Division of Medical Informatics</institution>
            <addr-line>3901 Rainbow Boulevard</addr-line>
            <addr-line>Kansas City, KS, 66160</addr-line>
            <country>United States</country>
            <phone>1 9139456446</phone>
            <email>meiliu@kumc.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8036-2110</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>University of Kansas Medical Center</institution>
        <institution>Department of Internal Medicine, Division of Medical Informatics</institution>
        <addr-line>Kansas City, KS</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>University of Kansas Medical Center</institution>
        <institution>Division of Nephrology and Hypertension and the Kidney Institute</institution>
        <addr-line>Kansas City, KS</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>University of Kansas Medical Center</institution>
        <institution>Diabetes Institute</institution>
        <addr-line>Kansas City, KS</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Jinan University</institution>
        <institution>Big Data Decision Institute</institution>
        <addr-line>Guangzhou</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Mei Liu <email>meiliu@kumc.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>1</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>31</day>
        <month>1</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>1</issue>
      <elocation-id>e15510</elocation-id>
      <history>
        <date date-type="received">
          <day>16</day>
          <month>7</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>8</day>
          <month>9</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>31</day>
          <month>10</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>31</day>
          <month>10</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Xing Song, Lemuel R Waitman, Alan SL Yu, David C Robbins, Yong Hu, Mei Liu. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 31.01.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2020/1/e15510/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Artificial intelligence–enabled electronic health record (EHR) analysis can revolutionize medical practice from the diagnosis and prediction of complex diseases to making recommendations in patient care, especially for chronic conditions such as chronic kidney disease (CKD), which is one of the most frequent complications in patients with diabetes and is associated with substantial morbidity and mortality.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The longitudinal prediction of health outcomes requires effective representation of temporal data in the EHR. In this study, we proposed a novel temporal-enhanced gradient boosting machine (GBM) model that dynamically updates and ensembles learners based on new events in patient timelines to improve the prediction accuracy of CKD among patients with diabetes.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Using a broad spectrum of deidentified EHR data on a retrospective cohort of 14,039 adult patients with type 2 diabetes and GBM as the base learner, we validated our proposed Landmark-Boosting model against three state-of-the-art temporal models for rolling predictions of 1-year CKD risk.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The proposed model uniformly outperformed other models, achieving an area under receiver operating curve of 0.83 (95% CI 0.76-0.85), 0.78 (95% CI 0.75-0.82), and 0.82 (95% CI 0.78-0.86) in predicting CKD risk with automatic accumulation of new data in later years (years 2, 3, and 4 since diabetes mellitus onset, respectively). The Landmark-Boosting model also maintained the best calibration across moderate- and high-risk groups and over time. The experimental results demonstrated that the proposed temporal model can not only accurately predict 1-year CKD risk but also improve performance over time with additionally accumulated data, which is essential for clinical use to improve renal management of patients with diabetes.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Incorporation of temporal information in EHR data can significantly improve predictive model performance and will particularly benefit patients who follow-up with their physicians as recommended.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>diabetic kidney disease</kwd>
        <kwd>diabetic nephropathy</kwd>
        <kwd>chronic kidney disease</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>With the rapid development in digitization of health care data, the modern electronic health records (EHRs) hold considerable promise for driving scientific advances in various aspects of biomedicine through the utilization of machine learning techniques. EHRs contain not only diverse clinical data elements that can better describe a patient’s overall health status but also rich longitudinal data of patients that serve as a critical source for understanding the evolution of disease and management of chronic conditions. Developing accurate risk prediction models to drive timely initiation of appropriate therapies and monitoring is of paramount importance for conditions that have a substantial public health impact and can benefit greatly from early intervention.</p>
        <p>Chronic kidney disease (CKD), especially CKD attributed to diabetes, that is, diabetic kidney disease (DKD), certainly falls within this category [<xref ref-type="bibr" rid="ref1">1</xref>]. DKD is one of the most frequent and dangerous microvascular complications in diabetes mellitus (DM) that affects about 20% to 40% of patients with type 1 or type 2 DM [<xref ref-type="bibr" rid="ref2">2</xref>]. It is the leading cause of end-stage renal disease (ESRD), which accounts for approximately 50% of the cases in the developed world with major public health and economic implications [<xref ref-type="bibr" rid="ref3">3</xref>]. Therefore, annual screening is recommended for patients with type 1 and type 2 diabetes [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>], which in turn has two implications: (1) there is a better chance for us to observe more regular and meaningful temporal patterns among these patients, and (2) an effective model for predicting the risk of DKD in the following year can be more beneficial for patients who are compliant to the annual check protocol because this allows implementation of early preventive measures.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <p>The effective use of temporal EHR data for predictive modeling remains a challenge owing to its highly variable sampling rates across different groups of patients (eg, patients may not follow the annual check protocol and only visit the hospital for critical health events) and distinct data types (eg, vital signs are noted hourly during inpatient encounters, whereas laboratory tests and medications are recorded when clinicians order them, and demographic data are more stable). Attempts have been made to handle temporal information in a variety of clinical applications. One approach involves representing the time series of clinical features with a single heuristic value (eg, taking the latest value or the trend [<xref ref-type="bibr" rid="ref6">6</xref>] or shrinking to a weighted sum of values with the <italic>weights</italic> determined by the timestamps [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]). Another approach is to preserve the underlying sequential order by mapping the time series into temporal patterns (eg, knowledge-based temporal abstraction or hidden Markov chains [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]) or symbolic representations (eg, the Symbolic Aggregate approXimation based on Gaussian quantiles and the temporal discretization for classification [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]). Moreover, deep learning techniques such as recurrent neural networks, in particular, long- and short-term memory and Gated recurrent units, have contributed to model temporal events [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>]. However, it has also been reported in the corresponding work that many such approaches could suffer from high data sparsity or <italic>informative missingness</italic> and insufficient training data.</p>
        <p>In the prediction of kidney-related events, single-value abstraction is the most popular approach for its simplicity but at the expense of reduced temporal granularity. For example, in the ADVANCE prospective study for diabetic nephropathy, only baseline values of selected labs and vitals are used in a Cox proportional survival model [<xref ref-type="bibr" rid="ref16">16</xref>]. A multivariate Cox proportional survival model was developed for predicting ESRD based on mean- and variation-abstractions of repeated glycated hemoglobin (HbA<sub>1c</sub>), creatinine, and blood pressure measurements [<xref ref-type="bibr" rid="ref17">17</xref>]. More sophisticated use of temporal EHRs has also been studied, many of which were targeted at severe or acute kidney-related events. A Bayesian multiresolution hazard model for predicting CKD progression from stage III to stage IV attempted to capture temporal patterns by associating variables with piece-wise hazard increments at different time windows [<xref ref-type="bibr" rid="ref18">18</xref>], whereas an independent Markov process modeled the underlying sequential latent states for predicting the transition from CKD stage III to stage IV [<xref ref-type="bibr" rid="ref19">19</xref>]. A multitask linear model enabled knowledge transfer from one time window to another in the prediction of short-term renal function loss [<xref ref-type="bibr" rid="ref20">20</xref>], and a tree-based discrete-survival-like gradient boosting machine (GBM) predicting acute kidney injury in inpatients allowed the features and their association with outcome to be time variant and showed excellent performance [<xref ref-type="bibr" rid="ref21">21</xref>]. However, all of the aforementioned approaches require moderate to high manual effort on feature preselection and curation, which not only limits the scalability of the predictive models but also discards considerable amount of information in each patient’s records [<xref ref-type="bibr" rid="ref15">15</xref>]. In addition, the complexity of EHR data often violates the linearity and independence assumptions for survival and linear models, resulting in worse predictions and impaired generalizability.</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>In this study, we propose a new approach for incorporating the temporal information in medical history of patients with diabetes to further improve the predictive model for evaluating their risk of renal complication in the next year. Because of its robustness, efficiency, and established efficacy in the prediction of kidney events [<xref ref-type="bibr" rid="ref21">21</xref>], we chose GBM as the base learner and augmented it with schemes to continuously update its learning results based on new patient inputs over a full breadth of EHR data on a yearly basis, named <italic>Landmark-Boosting</italic>. Here, the <italic>landmark</italic> time refers to an unbiased reference point (eg, <italic>t</italic> years since the onset of DM) at which we want to construct stagewise prediction models and make dynamic risk predictions using information collected up to that time [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. The final prediction model is then an ensemble of individual boosting models trained at each landmark time <italic>a</italic> <italic>priori</italic>.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Definition of Diabetes</title>
        <p>We adopted the Surveillance, Prevention, and Management of Diabetes Mellitus definition of diabetes in this study. Diabetes was defined based on the following: (1) the use of glucose-lowering medications (insulin or oral hypoglycemic medications); or (2) level of HbA<sub>1c</sub> of 6.5% or greater, random glucose of 200 mg/dL or greater, or fasting glucose of 126 mg/dL on at least two different dates within 2 years; or (3) any two type 1 and type 2 DM diagnoses been given on 2 different days within 2 years; or (4) any two distinct types of events among (1), (2), or (3); and (5) excluding any gestational diabetes (temporary glucose rise during pregnancy) [<xref ref-type="bibr" rid="ref24">24</xref>]. DM onset time was defined as the first occurrence of any events from (1) through (5).</p>
      </sec>
      <sec>
        <title>Definition of Diabetic Kidney Disease</title>
        <p>DKD was defined as diabetes with the presence of microalbuminuria or proteinuria, impaired glomerular filtration rate (GFR), or both [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Microalbuminuria was defined as albumin-to-creatinine ratio (ACR) being 30 mg/g or greater, and similarly, proteinuria was defined as urine protein-to-creatinine ratio being 30 mg/g or greater [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Impaired GFR was defined as the estimated GFR (eGFR), an age-, gender-, race-adjusted serum creatinine concentration based on the modification of diet in renal disease equation [<xref ref-type="bibr" rid="ref27">27</xref>] being less than 60 mL/min/1.73 m<sup>2</sup>.</p>
      </sec>
      <sec>
        <title>Study Cohort</title>
        <p>The study constructed a retrospective cohort using deidentified EHR and billing data from November 2007 to December 2017 in the University of Kansas Medical Center’s integrated clinical data repository Healthcare Enterprise Repository for Ontological Narration (HERON) [<xref ref-type="bibr" rid="ref28">28</xref>]. The study did not require approval from the institutional review board because data used met the deidentification criteria specified in the Health Insurance Portability and Accountability Act Privacy Rule. The HERON Data Request Oversight Committee approved the data request. As shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>, a total of 35,779 adult patients with nongestational DM (age≥18 years) who had at least one valid eGFR or ACR record at an outpatient encounter were eligible for this study so that they could be identifiable as DKD present or not. We excluded patients presenting with any type 1 DM or cystic fibrosis–related diabetes diagnoses over their observation period and those who had kidney disease manifestation (eg, CKD diagnosis, low eGFR, or microalbuminuria) before the onset of DM. The case group included all DKD patients with their DKD onset time, or end point, defined as the first time of their abnormal eGFR or ACR. The control group was defined as patients with DM whose eGFR values were always above or equal to 60 mL/min/1.73 m<sup>2</sup> and had never had microalbuminuria, with their end point defined as the last time of their normal eGFR or ACR. Finally, 14,039 patients were included in the final cohort with 4785 (34.08%) patients with DKD.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Study cohort inclusion and exclusion. Note that the counts of exclusions do not necessarily add up to the difference between the initial and final population, as 1 patient could satisfy multiple exclusion criteria. ACR: albumin-to-creatinine ratio; DKD: diabetic kidney disease; DM: diabetes mellitus; EGFR: estimated glomerular filtration rate.</p>
          </caption>
          <graphic xlink:href="medinform_v8i1e15510_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Clinical Variable Extraction</title>
        <p>According to our data, the heuristic time between 2 adjacent outpatient eGFR or ACR labs is on average 1 year per patient. Thus, for a patient <italic>i,</italic> a sequence of time-stamped examples (ie, DKD statuses, 1 for DKD and 0 for non-DKD), is identified based on their last outpatient eGFR or ACR collected annually, denoted as {y<sub>i</sub><sup>t</sup>}<sub>t</sub><sup>T</sup>. Note that a patient may be missing eGFR/ACR during certain years, and we kept the corresponding DKD status as <italic>NA</italic> without any imputation. For example, the outcome sequence for a patient can be (0, NA, 1), which can be interpreted, respectively, as “the patient did not have DKD the same year as DM onset, but cannot determine DKD status for the second year, and had DKD onset in the third year.”</p>
        <p>Each patient was then represented by collecting 15 common types of clinical observations from HERON [<xref ref-type="bibr" rid="ref28">28</xref>] (<xref ref-type="table" rid="table1">Table 1</xref>). Each category is a mixture of categorical and numerical data elements. Numeric values were used for laboratory tests and vital signs, whereas binary indicator variables were used for categorical features. In addition, we abstracted the Medication variables at the Semantic Clinical Drug Form or Semantic Clinical Brand Form level and Diagnoses variables at the International Classification of Diseases (ICD)-9 or 10 code level [<xref ref-type="bibr" rid="ref29">29</xref>]. We further decomposed clinical features into more meaningful pieces according to (1) different sources of a diagnosis (ie, billing diagnoses or EHR problem list diagnoses), (2) different aspects of a medication fact (ie, drug refill or drug amount), (3) different types of encounters where a procedure was ordered or performed (ie, inpatient or outpatient), and (4) different states of an alert (ie, fired or overridden). These data elements were extracted from our institutional EHR and had been explicitly incorporated in our data warehouse as an additional i2b2-specific attribute called <italic>modifier</italic> [<xref ref-type="bibr" rid="ref30">30</xref>]. Among the initial 22,331 distinct features available for our study cohort, 15,707 (70%) were only recorded for &#60;1% of the patients, which we excluded to reduce data sparsity.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Integrated data repository data domain categories.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="131"/>
            <col width="492"/>
            <col width="128"/>
            <col width="133"/>
            <col width="116"/>
            <thead>
              <tr valign="top">
                <td>Domain</td>
                <td>Descriptions</td>
                <td>Data type</td>
                <td>Number of eligible features<sup>a</sup></td>
                <td>Patients<sup>b</sup>, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Alerts</td>
                <td>Includes drug interaction, dose warnings, drug interactions, medication administration warnings, and best practice alerts</td>
                <td>Binary</td>
                <td>531</td>
                <td>11,848 (84.39)</td>
              </tr>
              <tr valign="top">
                <td>Allergy</td>
                <td>Includes documented allergies and reactions</td>
                <td>Binary</td>
                <td>49</td>
                <td>5044 (35.93)</td>
              </tr>
              <tr valign="top">
                <td>Demographics</td>
                <td>Basic demographics such as age, gender, race, etc, as well as their reachability, and some geographical information</td>
                <td>Binary/numeric</td>
                <td>10</td>
                <td>14,039 (100.00)</td>
              </tr>
              <tr valign="top">
                <td>Diagnoses</td>
                <td>Organized using ICD<sup>c</sup>-9 and ICD-10 hierarchies. Intelligent Medical Objects interface terms are grouped to ICD-9 and ICD-10 levels. Diagnosis resources are further separated by source of the assignment (eg, EMR<sup>d</sup>, professional billing, technical billing, and registry).</td>
                <td>Binary</td>
                <td>1186</td>
                <td>12,616 (89.86)</td>
              </tr>
              <tr valign="top">
                <td>History</td>
                <td>Contains family, social (ie, smoking), and surgical history from the EMR, as well as engineered features such as number of distinct clinical facts and clinical fact increments since last collection point</td>
                <td>Binary/numeric</td>
                <td>155</td>
                <td>12,178 (86.74)</td>
              </tr>
              <tr valign="top">
                <td>Laboratory tests</td>
                <td>Results of a variety of laboratory tests, including cardiology and microbiology findings. Note that the actual laboratory values are used in modeling, if available.</td>
                <td>Binary/numeric</td>
                <td>685</td>
                <td>11,990 (85.40)</td>
              </tr>
              <tr valign="top">
                <td>Medications</td>
                <td>Includes dispensing, administration, prescriptions, as well as home medication reconciliation at the University of Kansas Hospital grouped at Semantic Clinical Drug Form or Semantic Clinical Brand Form level. Medication resources are further separated by types of medication activity.</td>
                <td>Binary</td>
                <td>1205</td>
                <td>8295 (59.09)</td>
              </tr>
              <tr valign="top">
                <td>Procedures</td>
                <td>Includes Current Procedural Terminology professional services and inpatient ICD-9 billing procedure codes.</td>
                <td>Binary</td>
                <td>560</td>
                <td>12,460 (88.75)</td>
              </tr>
              <tr valign="top">
                <td>Orders</td>
                <td>Includes physician orders for nonmedications, such as culture and imaging orders from the EMR.</td>
                <td>Binary</td>
                <td>1053</td>
                <td>12,460 (88.75)</td>
              </tr>
              <tr valign="top">
                <td>Vizient (billing)</td>
                <td>(formerly University Health System Consortium) Includes both billing classifications such as Diagnostic Related Groups, comorbidities, discharge placement, length of stay, and national quality metrics.</td>
                <td>Binary</td>
                <td>657</td>
                <td>3619 (25.78)</td>
              </tr>
              <tr valign="top">
                <td>Visit details</td>
                <td>Includes visit types, vital signs collected at the visit, discharge disposition, and clinical services providing care from both EMR and billing.</td>
                <td>Binary/numeric</td>
                <td>474</td>
                <td>13,671 (97.38)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>This does not include all distinct concepts from the entire Healthcare Enterprise Repository for Ontological Narration system; it only includes the total number of distinct features that had ever been recorded for at least one patient in the study cohort.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>This is the number of patients who have at least one observation during any time window recorded from the corresponding data domain.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>ICD: International Classification of Diseases.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>EMR: electronic medical record.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>In <xref rid="figure2" ref-type="fig">Figure 2</xref>, we illustrated the feature densities over time across different data types. Each row corresponds to the average number of distinct clinical facts per patient for a data type over 5 years before and after DM onset. An evident heterogeneity of clinical activities before and after DM onset can be observed. For example, lab frequencies are much higher in the first 2 years of DM onset, with visits becoming more frequent after DM onset.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Clinical feature densities across data types. Each row corresponds to the average number of distinct clinical facts per patient for a certain type of clinical data over 5 years before and after DM onset. The darker the region is, the more distinct facts have been recorded for patients on average within the corresponding time window. DM: diabetes mellitus; UHC: University HealthSystem Consortium.</p>
          </caption>
          <graphic xlink:href="medinform_v8i1e15510_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>In <xref ref-type="table" rid="table2">Table 2</xref>, we characterized the temporal variations by estimating the between-observation time, or observation intensity, for each data type and observed that the between-patient irregularity of sampling rates is significantly different from within-patient (<italic>P</italic>&#60;.001) based on the analysis of variance tests, except for demographics, suggesting varying degrees of health care exposure across patients and over time.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Clinical observation intensity.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="121"/>
            <col width="188"/>
            <col width="301"/>
            <col width="313"/>
            <col width="77"/>
            <thead>
              <tr valign="bottom">
                <td>Data type<sup>a</sup></td>
                <td>Mean time lapses (days)</td>
                <td>Within-patient standard deviation (days)</td>
                <td>Between-patient standard deviation (days)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Alerts</td>
                <td>67</td>
                <td>93</td>
                <td>146</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Allergy</td>
                <td>169</td>
                <td>158</td>
                <td>214</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Diagnoses</td>
                <td>87</td>
                <td>105</td>
                <td>133</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>History</td>
                <td>184</td>
                <td>230</td>
                <td>872</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Laboratory tests</td>
                <td>107</td>
                <td>122</td>
                <td>175</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Medications</td>
                <td>70</td>
                <td>70</td>
                <td>137</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Procedures</td>
                <td>74</td>
                <td>99</td>
                <td>132</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Orders</td>
                <td>81</td>
                <td>95</td>
                <td>127</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Vizient</td>
                <td>228</td>
                <td>189</td>
                <td>304</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Visit details</td>
                <td>36</td>
                <td>61</td>
                <td>70</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Demographics are not included as they are unique at the patient level.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Experimental Design</title>
        <p>For the clinical task of predicting DKD risk over the next year, we first randomly divided the 14,039 patients into training set (80%) for model development and validation set (20%) for performance evaluations. To simulate a more realistic clinical scenario and account for the bias caused by varying degrees of health care exposure over time, we stepped forward through patients’ time course and built prediction models at each landmark time, that is, every full year since DM onset, for rolling predictions of 1-year DKD risk. As such, individuals may contribute to or be tested by one or more prediction models, depending on their eligibility at the landmark time.</p>
      </sec>
      <sec>
        <title>Gradient Boosting Machine</title>
        <p>We chose GBMs as the baseline training model, which were then combined with four different approaches to incorporate temporal data. GBM is a family of powerful machine-learning techniques that have shown considerable success in a wide range of practical applications [<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. We chose GBM as the base learner for its robustness against high dimensionality and collinearity and also because it embeds feature selection scheme within the process of model development [<xref ref-type="bibr" rid="ref37">37</xref>]. To better control overfitting, we tuned the hyperparameters (depth of trees: 2-10; learning rate: 0.01-0.1; minimal child weight: 1-10; number of trees is determined by early stopping, ie, if the holdout area under the receiver operating curve [AUROC] had not been improved for 100 rounds, then we stopped adding trees) within the training set using 10-fold cross-validations.</p>
      </sec>
      <sec>
        <title>Missing Values</title>
        <p>Missing values were handled in the following fashion: for categorical data, a value of 0 was set for missing, whereas for numerical data, a <italic>missing value split</italic> was always accounted for, and the <italic>best</italic> imputation value can be adaptively learned based on the improvement in training AUROC at each tree node within the ensemble [<xref ref-type="bibr" rid="ref38">38</xref>]. For example, if a variable <italic>X</italic> takes values (0, 1, 2, 3, NA, and NA), where <italic>NA</italic> stands for missing, the following two decisions will be made automatically at each split for each tree: (1) should we split based on <italic>missing or not</italic>? and (2) if we split based on values, for example, &#62;1 or ≤0, should we merge the missing cases with the bin of &#62;1 or ≤0?</p>
      </sec>
      <sec>
        <title>Evaluation Metrics</title>
        <p>We used AUROC and area under precision recall curve (AUPRC) to compare the overall prediction performance, with the latter known to be more robust to imbalanced datasets. In addition, we characterized calibration by the observed-to-expected outcome ratio (O:E), which measures agreement between the predicted and observed risk on average across observations. By treating testing examples with predicted probability of outcome in the top 40th percentile as positive cases, we made fair performance comparisons among different methods and further examined the model’s ability in detecting positive vs negative cases by reporting the sensitivity, specificity, positive predictive values (PPVs), and negative predictive values.</p>
      </sec>
      <sec>
        <title>Temporal Information Incorporation</title>
        <p><xref rid="figure3" ref-type="fig">Figure 3</xref> depicts the four different approaches explored in this study for handling temporal EHR data: <italic>Latest-Value</italic> provides the most straightforward way to aggregate repeatedly measured variables; <italic>Stack-Temporal</italic> attempts to differentiate the effects of the same variable associated with different timestamps; and <italic>Discrete-Survival</italic> allows survival analysis model to be created by using binary classifier, which effectively enhances the chronical relationship between the predictors and the outcome. Landmark-Boosting is our proposed model motivated by the boosting method, which is designed to ensemble identification trees by learning over time. Each of the approaches is discussed in detail in the following sections.</p>
        <sec>
          <title>Latest-Value Approach</title>
          <p>In this approach, we simply collect the last observed value before each landmark time for each predictor across all time windows (<xref rid="figure3" ref-type="fig">Figure 3</xref>) [<xref ref-type="bibr" rid="ref16">16</xref>]. The Latest-Value approach is time agnostic, which implies it only retains the information about existence of certain predictors at the patient level. For example, the latest creatinine recorded for patient A can be 1 month ago but 1 year ago for patient B, which will be treated equally by this approach.</p>
        </sec>
        <sec>
          <title>Stack-Temporal Approach</title>
          <p>Given the variables for all time windows T, the Stack-Temporal approach concatenates the variable from all windows to represent patient <italic>x</italic><sub>i</sub> using p-dimensional vector, where p=number of variables x T (<xref rid="figure3" ref-type="fig">Figure 3</xref>) [<xref ref-type="bibr" rid="ref20">20</xref>]. One of the disadvantages of this approach is that the feature dimensionality increases proportionally to T, which may lead to worse prediction performance because of overfitting.</p>
        </sec>
        <sec>
          <title>Discrete-Survival Approach</title>
          <p>The Discrete-Survival approach simulates a discrete-time survival framework by separating the full course of patient’s medical history into <italic>L</italic> nonoverlapping yearly windows, <italic>L</italic>=1,2,...<italic>T</italic>, with variables from <italic>t</italic>-1 to predict DKD risk in <italic>t</italic> (<xref rid="figure3" ref-type="fig">Figure 3</xref>) [<xref ref-type="bibr" rid="ref21">21</xref>]. This approach assumes that examples from different time windows are independent of each other even if they may come from the same patient, which does not explicitly allow knowledge to be transferred from the previous time window to the next.</p>
        </sec>
        <sec>
          <title>Landmark-Boosting Approach</title>
          <p>To build the continuous learning mechanism, we developed a new method by extending the classical GBM to ensemble learners over time, that is, from one landmark time to the next (<xref rid="figure3" ref-type="fig">Figure 3</xref>). Specifically, we collected data <italic>D<sub>t</sub></italic>={(<italic>x<sub>it</sub> , y<sub>i</sub></italic>)} with <italic>i=1,2,…,N<sub>t</sub></italic> at each time window <italic>t</italic> and tried to solve the following optimization problem sequentially for all 1≤<italic>t≤T,</italic></p>
          <disp-formula>min <italic>E<sub>t&#124;t-1</sub></italic>[<italic>L</italic>(<italic>y, F<sub>t</sub></italic> (<italic>x<sub>t</sub>, F<sub>t-1</sub></italic>(<italic>x<sub>t-1</sub>,</italic> <italic>y<sub>t-1</sub></italic>)))] (1)</disp-formula>
          <p>where <italic>F</italic> represents the prediction function (ie, ensemble of trees), <italic>L</italic> represents the loss function (ie, logloss), and <italic>E</italic><sub>t/</sub><italic><sub>t</sub></italic><sub>-1</sub> stands for conditional expectation at time<italic>t</italic> using observed values at time <italic>t</italic>-1. In other words, we used the predicted probability from time <italic>t</italic>-1 as the baseline risk and ensembled new learners based on predictors updated at time <italic>t</italic>. <xref rid="figure4" ref-type="fig">Figure 4</xref> presents the algorithm describing the detailed implementation steps.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Illustration of the temporal approaches, which are Latest-Value, Stack-Temporal, Discrete-Survival, and Landmark-Boosting from top to bottom. Different colors of circles represent different types of clinical data. Red triangles represent real values of the outcome (ie, diabetic kidney disease (DKD) or non-diabetic kidney disease in the following prediction window). Blue triangles represent predicted outcome based on clinical features presented in the previous observation window. Xti denotes all available clinical features collected strictly before landmark time ti (ie, number of full years since DM onset). yti denotes real label of DKD onset after within the prediction window (ti, ti+1). DM: diabetes mellitus.</p>
            </caption>
            <graphic xlink:href="medinform_v8i1e15510_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Pseudocode for landmark boosting algorithm. In this experiment, Mt (the number of trees at each iteration is set to 1000), α (learning rate), and Ω(hMt) (levels of each tree) are hyperparameters tuned by 10-fold cross-validation on the training dataset at each iteration.</p>
            </caption>
            <graphic xlink:href="medinform_v8i1e15510_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Cohort Characteristics</title>
        <p>At each landmark time, the eligibility of a patient was determined by checking if a valid eGFR or ACR reading presented in the current time window and was neither DKD nor censored in the previous time windows. As shown in <xref ref-type="table" rid="table3">Table 3</xref>, the number of eligible patients dropped over time with an increasing DKD rate as a mixing result of cases dropping out or censored from last time.</p>
        <p>There is a mild decreasing trend of age and race (white) proportion over the landmark times. In addition, we compared such case-mix shifts between training and testing sets and found no significant differences (<xref ref-type="table" rid="table4">Table 4</xref>).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Case-mix shift over landmark time.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="329"/>
            <col width="113"/>
            <col width="111"/>
            <col width="171"/>
            <col width="132"/>
            <col width="144"/>
            <thead>
              <tr valign="top">
                <td>Landmark time (number of years since DM<sup>a</sup> onset)</td>
                <td>Eligible, n (%)</td>
                <td>DKD<sup>b</sup>, n (%)</td>
                <td>Age (years), mean (SD)</td>
                <td>Sex (male), n (%)</td>
                <td>Race (white), n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0</td>
                <td>10,705 (76.25)</td>
                <td>1673 (15.63)</td>
                <td>58 (13)</td>
                <td>5229 (48.84)</td>
                <td>7221 (67.45)</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>7755 (72.44)</td>
                <td>1467 (18.92)</td>
                <td>58 (13)</td>
                <td>3782 (48.77)</td>
                <td>5185 (66.86)</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>5689 (73.36)</td>
                <td>1163 (20.44)</td>
                <td>57 (13)</td>
                <td>2734 (48.06)</td>
                <td>3715 (65.30)</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>4113 (72.30)</td>
                <td>914 (22.22)</td>
                <td>56 (12)</td>
                <td>2002 (48.67)</td>
                <td>2671 (64.94)</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>3006 (73.09)</td>
                <td>740 (25.73)</td>
                <td>56 (12)</td>
                <td>1480 (49.23)</td>
                <td>1941 (64.57)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>DM: diabetes mellitus.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>DKD: diabetic kidney disease.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Case-mix shift in training and testing sets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="350"/>
            <col width="260"/>
            <col width="260"/>
            <col width="100"/>
            <thead>
              <tr valign="bottom">
                <td colspan="2">Landmark time (number of years since DM<sup>a</sup> onset)</td>
                <td>Training (n=11,184)</td>
                <td>Testing (n=2855)</td>
                <td><italic>P</italic> value<sup>b</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Eligible</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>8524</td>
                <td>2181</td>
                <td>—<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>6174</td>
                <td>1581</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2</td>
                <td>4537</td>
                <td>1152</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>3</td>
                <td>3254</td>
                <td>859</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>4</td>
                <td>2366</td>
                <td>640</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Diabetic kidney disease, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>1352 (15.86)</td>
                <td>321 (14.72)</td>
                <td>.19</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>1174 (19.02)</td>
                <td>293 (18.53)</td>
                <td>.66</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2</td>
                <td>952 (20.98)</td>
                <td>211 (18.32)</td>
                <td>.05</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>3</td>
                <td>732 (22.50)</td>
                <td>182 (21.19)</td>
                <td>.41</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>4</td>
                <td>586 (24.77)</td>
                <td>154 (24.06)</td>
                <td>.71</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Age (years), mean (SD)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>57.8 (13.1)</td>
                <td>57.4 (13.1)</td>
                <td>.98</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>57.6 (12.8)</td>
                <td>57.3 (12.7)</td>
                <td>.98</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2</td>
                <td>57.0 (12.6)</td>
                <td>56.9 (13.1)</td>
                <td>&#62;.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>3</td>
                <td>56.4 (12.6)</td>
                <td>57.1 (12.0)</td>
                <td>.96</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>4</td>
                <td>56.1 (12.3)</td>
                <td>56.7 (11.7)</td>
                <td>.99</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Sex (male), n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>4183 (49.07)</td>
                <td>1046 (47.96)</td>
                <td>.98</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>3023 (48.96)</td>
                <td>759 (48.01)</td>
                <td>.98</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2</td>
                <td>2208 (48.67)</td>
                <td>526 (45.66)</td>
                <td>.95</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>3</td>
                <td>1593 (48.96)</td>
                <td>409 (47.61)</td>
                <td>.98</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>4</td>
                <td>1173 (49.58)</td>
                <td>307 (47.97)</td>
                <td>.97</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Race (white), n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>5776 (67.76)</td>
                <td>1445 (66.25)</td>
                <td>.97</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>4145 (67.14)</td>
                <td>1040 (65.78)</td>
                <td>.97</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2</td>
                <td>2975 (65.57)</td>
                <td>740 (64.24)</td>
                <td>.97</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>3</td>
                <td>2123 (65.24)</td>
                <td>548 (63.79)</td>
                <td>.95</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>4</td>
                <td>1541 (65.13)</td>
                <td>400 (62.50)</td>
                <td>.89</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>DM: diabetes mellitus.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup><italic>P</italic> value is based on two-sample <italic>t</italic> test for age and two-sample proportion test for the other comparisons.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>The two-sample test is not applicable for the corresponding comparison.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Prediction Performance</title>
        <p>Overall, the prediction results in <xref rid="figure5" ref-type="fig">Figure 5</xref> showed that the proposed Landmark-Boosting model outperformed other temporal data representation methods with respect to all evaluation metrics. The Stack-Temporal approach always showed the worst performance, whereas the Latest-Value and Discrete-Survival approaches demonstrated competitive results. Only the Landmark-Boosting model had an increasing trend in AUROC over the years after DM onset, which peaked at =2 with value of 0.83 (95% CI 0.76-0.85). AUPRC showed a steadily increasing performance of all approaches over time, whereas the Landmark-Boosting model dominated at each landmark time and reached 0.75 (95% CI 0.65-0.80) at =4. Sensitivity declined slightly over time and achieved an optimal point at <italic>t</italic>=2 with the Landmark-Boosting model persistently outperforming others with a sensitivity of 83% (95% CI 79%-88%). In terms of specificity, Landmark-Boosting also outperformed others at each landmark time and achieved 78% (95% CI 74%-83%) at landmark time 4. Moreover, PPV improved over landmark time with the Landmark-Boosting approach showing the best performance reaching 67% (95% CI 57%-75%) at landmark time 4 (whereas the second-best model, Discrete-Survival, achieved 51% [95% CI 44%-57%]), translating to correct identification of 503 patients with DKD (whereas the second-best model only identified 383 patients with DKD).</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Performance comparisons among temporal approaches over landmark time. Area under receiver operating curve (AUROC) and area under the precision-recall curve (PRAUC) are first reported. For fair comparisons, sensitivity, specificity, positive predicted value, and negative predicted value are calculated by treating testing examples with predicted probability of outcome in the top 40th percentile as positive cases. Here, 95% bootstrap confidence intervals are reported for each metric at each landmark time (ie, full year since diabetes mellitus [DM] onset). The bootstrap confidence intervals are generated based on 30 bootstrapped samples, and used 2.5th percentile, 50th percentile, and 97.5th percentile to construct the confidence intervals for each metric.</p>
          </caption>
          <graphic xlink:href="medinform_v8i1e15510_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p><xref rid="figure6" ref-type="fig">Figure 6</xref> presents regional calibration on the original predicted probability scale grouped into 20 bins. The <italic>overpredicted</italic> or <italic>underpredicted</italic> was defined as “the O:E ratio within a prediction bin that is significantly below or above 1 (<italic>P</italic> value&#60;.05),” whereas the remaining cases were considered <italic>calibrated</italic>. Clearly, the Landmark-Boosting approach also dominated all other temporal methods on calibration, with a dip of overestimation for the group with moderate risk at <italic>t</italic>=2. Both Latest-Value and Stack-Temporal models underestimated the risk, especially at &#62;2. Discrete-Survival model appeared to overestimate the risk at early years for the low-risk group but tended to underestimate the risk in later years.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Calibration comparisons among temporal approaches over landmark time. Regions of calibration across the range of predicted probabilities, scaled by proportion of observations in each region and shaded by the magnitude of the within-region observed-to-expected ratio (O:E), with green suggests underprediction (ie, O:E significantly less than 1), and red suggests overprediction (ie, O:E significantly larger than 1). Pearson correlation coefficients between predicted and actual values over landmark times for each temporal model are included in the table below (the closer the coefficient is to 1, the better the predicted and actual values are linearly related). DM: diabetes mellitus.</p>
          </caption>
          <graphic xlink:href="medinform_v8i1e15510_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Case Study</title>
        <p>To closely examine the prediction change over time, we extracted a subset of 111 testing cases eligible at all five landmark times (ie, who had outcome sequence either like [0,0,0,0,0] or [0,0,0,0,1]) and plotted their predicted probability percentiles over years (<xref rid="figure7" ref-type="fig">Figure 7</xref>). We observed significant differences in the risk trajectory between patients with and without DKD depicted by the Landmark-Boosting method, with a much sharper increase of relative risk for most patients with DKD after year 1 and more obvious separation of risks over time. On the other hand, all other three methods suggested stable or even decreasing relative risk for patients with DKD over time, without much deviation from patients without DKD, with only a few exceptions.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>A visualization of predicted diabetic kidney disease (DKD) risk over landmark time. Risk percentiles (ie, normalized risk scores) against landmark time for a sample of patients. Each red line represents patient who finally progressed to DKD, whereas each green line represents patient who did not. DM: diabetes mellitus.</p>
          </caption>
          <graphic xlink:href="medinform_v8i1e15510_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The study results suggested that exploiting historical temporal EHR data in predictive models would significantly improve prediction performance, especially with our proposed Landmark-Boosting model. As demonstrated in <xref rid="figure5" ref-type="fig">Figure 5</xref>, the 4 different temporal models started with similar predictive power during the same year of DM onset but started to deviate along the landmark times. We observed a declining AUROC over time, with our proposed model being the only exception. One potential explanation is that the sensitivity of other three models may be affected by the upward case-mix shift (<xref ref-type="table" rid="table3">Table 3</xref>), that is, the models’ ability to detect positive cases was impaired. For example, the optimal sensitivity of Stack-Temporal model seemed to top at the beginning but suffered a severe drop over time without any significant improvement of specificity, which may be a result of potential overfitting caused by increasing dimensionality. Within the first 2 years, the Latest-Value model seemed to yield a competitive sensitivity against the Landmark-Boosting model while the latter exceled afterward, indicating the effect of continuous self-correction mechanism that began to manifest after the second year since DM onset. A local peak of specificity presenting at year 2 for all four models implied a change in their <italic>interests</italic> toward the non-DKDs; however, only the Landmark-Boosting model kept the balance by preserving a good sensitivity. In contrast with AUROC, which has been criticized as being susceptible to class imbalance [<xref ref-type="bibr" rid="ref39">39</xref>], AUPRC demonstrated a steady trend of increase over landmark times for all temporal models, which was mainly attributable to PPV improvement, indicating that the signals from DKD samples may have become stronger over time, likely as a result of increasing DKD prevalence over the landmark years. Nonetheless, the proposed Landmark-Boosting model dominated the others and even showed increasing margins along landmark times. For instance, the Landmark-Boosting model identified 46, 36, and 120 more true cases than the second-best model (91, 72, and 135 more than the nontemporal Latest-Value model) at 2, 3, and 4 years. Moreover, the Landmark-Boosting model was clearly better than the other models on calibration that never underestimated the risks (<xref rid="figure6" ref-type="fig">Figure 6</xref>), whereas the Stack-Temporal model also seemed to be well calibrated within the first 2 years of DM onset.</p>
      </sec>
      <sec>
        <title>Clinical Implications</title>
        <p>Our proposed temporal model will benefit patients with longitudinal data, and the longer we follow up, the better the model can predict the next-year DKD risk by self-adjustment with respect to both the individual’s medical history and population shift over time. The study has three important implications. First, our investigation confirmed that temporal EHR and billing data carry critical information depicting the progression of the patient’s condition, and it is important to choose the appropriate method for incorporating longitudinal data to promote the <italic>predictivity</italic> of modern medicine. Second, by allowing the model to evolve along patients’ landmark times, we not only reduced the biases related to a patient’s exposure within EHR but also simulated a scenario that mirrors the clinical practice for annual screening. Third, rather than prior predictive analyses that were mostly population based [<xref ref-type="bibr" rid="ref40">40</xref>] or personalized longitudinal models requiring complete patient history [<xref ref-type="bibr" rid="ref10">10</xref>], our model sought a middle ground, aiming to weave together information at both population and individual levels, for example, the GBM built at each landmark time is an attempt to fit the concurrent population, whereas the carrying over of last individual predictions is for the purpose of preserving personal information.</p>
        <p>Our model can continually calculate kidney disease risk for patients with diabetes with automatic collection of new EHR data and improve prediction over time. The ability to precisely stratify patients with diabetes by their renal complication risk in the coming year would merit a variety of potential intervention designs: (1) <italic>nutritional interventions</italic> that differentiate dietary consultation according to relative DKD risk, for example, presenting dietary flyers to all patients with type 2 DM but arranging in-person consultation sessions for those in the high-risk bin with dietitians knowledgeable in CKD diet; (2) <italic>lifestyle interventions</italic> that encourage personalized health-promoting behaviors such as smoking cessation and physical activity at different intensity levels based on their DKD risk; (3) <italic>medication management</italic> by designing targeted strategies according to the risk to encourage patient medication compliance, especially with blood pressure and glucose control medications, and warn patients and physicians against the use of nephrotoxic medications, for example, nonsteroidal anti-inflammatory drugs, unless absolutely necessary for high-risk patients because patients with diabetes are already at a higher risk for developing transient decreases in renal function consistent with acute kidney injury, and nephrotoxic drug exposure can amplify that risk. Moreover, with the DKD risk factor discovery framework developed in our previous work [<xref ref-type="bibr" rid="ref41">41</xref>], we can further empower the predictive models by outputting explainable risk factors and quantifying their effects on DKD specific to subgroups within different risk bins to better support physicians in designing tailored therapy and management strategies. More importantly, the Landmark-Boosting model almost never underestimated the risk compared with other models, especially among the high-risk group, which is clinically ideal because timely medication management can be effective in protecting high-risk patients from unnecessary harm to the kidney due to the use of nephrotoxic medications.</p>
      </sec>
      <sec>
        <title>Limitations and Future Work</title>
        <p>There are several limitations to our work. Disease diagnosis sequence is not necessarily the same as the disease manifestation sequence, which may lead to the underestimation of false-negative rates for DKD in this study. For example, our exclusion criteria may have excluded patients with DKD who visited our hospital for their kidney disease but have not had their diabetes-related information recorded in our EHR yet. In addition, the current design of our model is not robust against population drift because of changes in practice over time or differences in clinical vocabulary and workflow implemented across institutions. To further investigate the generalizability of our model, it is necessary to perform external validations and adequate recalibration based on patients from different sites as well as over calendar years to capture the general population shift and practice change.</p>
        <p>Although not the focus of this paper, we further examined the factors that potentially contributed to the superiority of the Landmark-Boosting model. In <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, we present the top 50 important features selected by the Landmark-Boosting model and their varying rankings among the other temporal models. Only a few important variables were common across all models (eg, age at DM onset and creatinine). Most top-ranked factors by the Landmark-Boosting model were less important in the other three temporal models (eg, previous visit to cardiovascular clinic, triglycerides, glucose, and exposure to codeine derivative). Furthermore, we examined the features that may contribute to improving the performance of Landmark-Boosting model over time. As shown in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, we collected the top 30 important features at year 4 and backtracked their rankings in previous years. For each feature, we calculated the Pearson correlation coefficient between ranking and landmark time to determine if the feature ranking increased/decreased significantly over time. Factors showing improved predictive power over time included cumulative clinical fact counts, previous visit to cardiovascular clinic, systolic blood pressure, triglycerides, and alanine aminotransferase. Built on these preliminary findings, we plan to further characterize and evaluate the changing feature representations over time in our future work.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study addressed the problem of underutilization of temporal information in EHR-based predictive models. We proposed a new approach in leveraging the temporal dynamics in EHR to improve DKD prediction and validated it against three state-of-the-art models using the idea of <italic>landmark time</italic> to simulate real clinical utility. Experimental results demonstrated that the proposed Landmark-Boosting model can effectively capture temporal dynamics in EHR without overfitting and further improve on patients with a longer follow-up time.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Variable importance ranking across model and over time.</p>
        <media xlink:href="medinform_v8i1e15510_app1.docx" xlink:title="DOCX File , 165 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ACR</term>
          <def>
            <p>albumin-to-creatinine ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUPRC</term>
          <def>
            <p>area under precision recall curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">AUROC</term>
          <def>
            <p>area under receiver operating curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CKD</term>
          <def>
            <p>chronic kidney disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">DKD</term>
          <def>
            <p>diabetic kidney disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">DM</term>
          <def>
            <p>diabetes mellitus</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">eGFR</term>
          <def>
            <p>estimated glomerular filtration rate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">ESRD</term>
          <def>
            <p>end-stage renal disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">GBM</term>
          <def>
            <p>gradient boosting machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">GFR</term>
          <def>
            <p>glomerular filtration rate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">HbA<sub>1c</sub></term>
          <def>
            <p>glycated hemoglobin</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">HERON</term>
          <def>
            <p>Healthcare Enterprise Repository for Ontological Narration</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>YH is supported by the Major Research Plan of the National Natural Science Foundation of China (Key Program, grant number 91746204) and grant award from the Science and Technology Department in Guangdong Province (Major Projects of Advanced and Key Techniques Innovation, grant number 2017B030308008). The dataset used for analysis described in this study was obtained from the University of Kansas Medical Center’s HERON clinical data repository, which is supported by institutional funding and by the University of Kansas Medical Center Clinical and Translational Science Award grant UL1TR002366 from the National Center for Advancing Translational Sciences.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kramer</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Screening for kidney disease in adults with diabetes and prediabetes</article-title>
          <source>Curr Opin Nephrol Hypertens</source>
          <year>2005</year>
          <month>05</month>
          <volume>14</volume>
          <issue>3</issue>
          <fpage>249</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1097/01.mnh.0000165891.67878.7f</pub-id>
          <pub-id pub-id-type="medline">15821418</pub-id>
          <pub-id pub-id-type="pii">00041552-200505000-00010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Persson</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rossing</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Diagnosis of diabetic kidney disease: state of the art and future perspective</article-title>
          <source>Kidney Int Suppl (2011)</source>
          <year>2018</year>
          <month>01</month>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>2</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30675433"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.kisu.2017.10.003</pub-id>
          <pub-id pub-id-type="medline">30675433</pub-id>
          <pub-id pub-id-type="pii">S2157-1716(17)30060-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6336222</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tuttle</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Bakris</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Bilous</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Chiang</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>de Boer</surname>
              <given-names>IH</given-names>
            </name>
            <name name-style="western">
              <surname>Goldstein-Fuchs</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hirsch</surname>
              <given-names>IB</given-names>
            </name>
            <name name-style="western">
              <surname>Kalantar-Zadeh</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Narva</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Navaneethan</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Neumiller</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>UD</given-names>
            </name>
            <name name-style="western">
              <surname>Ratner</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Whaley-Connell</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Molitch</surname>
              <given-names>ME</given-names>
            </name>
          </person-group>
          <article-title>Diabetic kidney disease: a report from an ADA Consensus Conference</article-title>
          <source>Diabetes Care</source>
          <year>2014</year>
          <month>10</month>
          <volume>37</volume>
          <issue>10</issue>
          <fpage>2864</fpage>
          <lpage>83</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25249672"/>
          </comment>
          <pub-id pub-id-type="doi">10.2337/dc14-1296</pub-id>
          <pub-id pub-id-type="medline">25249672</pub-id>
          <pub-id pub-id-type="pii">37/10/2864</pub-id>
          <pub-id pub-id-type="pmcid">PMC4170131</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Molitch</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>DeFronzo</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Franz</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Keane</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Mogensen</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Parving</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Steffes</surname>
              <given-names>MW</given-names>
            </name>
            <collab>American Diabetes Association</collab>
          </person-group>
          <article-title>Nephropathy in diabetes</article-title>
          <source>Diabetes Care</source>
          <year>2004</year>
          <month>01</month>
          <volume>27</volume>
          <issue>Suppl 1</issue>
          <fpage>S79</fpage>
          <lpage>83</lpage>
          <pub-id pub-id-type="doi">10.2337/diacare.27.2007.s79</pub-id>
          <pub-id pub-id-type="medline">14693934</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gross</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>de Azevedo</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Silveiro</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Canani</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Caramori</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Zelmanovitz</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Diabetic nephropathy: diagnosis, prevention, and treatment</article-title>
          <source>Diabetes Care</source>
          <year>2005</year>
          <month>01</month>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>164</fpage>
          <lpage>76</lpage>
          <pub-id pub-id-type="doi">10.2337/diacare.28.1.164</pub-id>
          <pub-id pub-id-type="medline">15616252</pub-id>
          <pub-id pub-id-type="pii">28/1/164</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Orphanou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Stassopoulou</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Keravnou</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Temporal abstraction and temporal Bayesian networks in clinical domains: a survey</article-title>
          <source>Artif Intell Med</source>
          <year>2014</year>
          <month>03</month>
          <volume>60</volume>
          <issue>3</issue>
          <fpage>133</fpage>
          <lpage>49</lpage>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2013.12.007</pub-id>
          <pub-id pub-id-type="medline">24529699</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(14)00002-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Henriksson</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Learning temporal weights of clinical events using variable importance</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2016</year>
          <month>07</month>
          <day>21</day>
          <volume>16</volume>
          <issue>Suppl 2</issue>
          <fpage>71</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-016-0311-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-016-0311-6</pub-id>
          <pub-id pub-id-type="medline">27459993</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-016-0311-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC4965710</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Augusto</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Temporal reasoning for decision support in medicine</article-title>
          <source>Artif Intell Med</source>
          <year>2005</year>
          <month>01</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2004.07.006</pub-id>
          <pub-id pub-id-type="medline">15617978</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(04)00106-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shahar</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A framework for knowledge-based temporal abstraction</article-title>
          <source>Artif Intell</source>
          <year>1997</year>
          <volume>90</volume>
          <issue>1-2</issue>
          <fpage>79</fpage>
          <lpage>133</lpage>
          <pub-id pub-id-type="doi">10.1016/S0004-3702(96)00025-2</pub-id>
          <pub-id pub-id-type="pii">0933365795000364</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ghosh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ramamohanarao</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Septic shock prediction for ICU patients via coupled HMM walking on sequential contrast patterns</article-title>
          <source>J Biomed Inform</source>
          <year>2017</year>
          <month>02</month>
          <volume>66</volume>
          <fpage>19</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(16)30184-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2016.12.010</pub-id>
          <pub-id pub-id-type="medline">28011233</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(16)30184-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Keogh</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lonardi</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Experiencing SAX: a novel symbolic representation of time series</article-title>
          <source>Data Min Knowl Discov</source>
          <year>2007</year>
          <volume>15</volume>
          <issue>2</issue>
          <fpage>107</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1007/s10618-007-0064-z</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moskovitch</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Shahar</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Classification-driven temporal discretization of multivariate time series</article-title>
          <source>Data Min Knowl Discov</source>
          <year>2014</year>
          <month>10</month>
          <day>2</day>
          <volume>29</volume>
          <issue>4</issue>
          <fpage>871</fpage>
          <lpage>913</lpage>
          <pub-id pub-id-type="doi">10.1007/s10618-014-0380-z</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10620-018-5316-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC6436636</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rasmy</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Geng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhi</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A study of generalizability of recurrent neural network-based predictive models for heart failure onset risk using a large and heterogeneous EHR data set</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>08</month>
          <volume>84</volume>
          <fpage>11</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30117-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.06.011</pub-id>
          <pub-id pub-id-type="medline">29908902</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30117-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC6076336</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Che</surname>
              <given-names>ZP</given-names>
            </name>
            <name name-style="western">
              <surname>Purushotham</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sontag</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Recurrent neural networks for multivariate time series with missing values</article-title>
          <source>Sci Rep</source>
          <year>2018</year>
          <month>04</month>
          <day>17</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>6085</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.1038/s41598-018-24271-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-018-24271-9</pub-id>
          <pub-id pub-id-type="medline">29666385</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-018-24271-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC5904216</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rajkomar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oren</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Hajaj</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hardt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sundberg</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Flores</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Duggan</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Irvine</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Litsch</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mossin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tansuwan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wexler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ludwig</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Volchenboum</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Madabushi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Howell</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Scalable and accurate deep learning with electronic health records</article-title>
          <source>NPJ Digit Med</source>
          <year>2018</year>
          <volume>1</volume>
          <fpage>18</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31304302"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-018-0029-1</pub-id>
          <pub-id pub-id-type="medline">31304302</pub-id>
          <pub-id pub-id-type="pii">29</pub-id>
          <pub-id pub-id-type="pmcid">PMC6550175</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jardine</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hata</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Woodward</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perkovic</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ninomiya</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Arima</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zoungas</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cass</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Marre</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mancia</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mogensen</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Poulter</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chalmers</surname>
              <given-names>J</given-names>
            </name>
            <collab>ADVANCE Collaborative Group</collab>
          </person-group>
          <article-title>Prediction of kidney-related outcomes in patients with type 2 diabetes</article-title>
          <source>Am J Kidney Dis</source>
          <year>2012</year>
          <month>11</month>
          <volume>60</volume>
          <issue>5</issue>
          <fpage>770</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1053/j.ajkd.2012.04.025</pub-id>
          <pub-id pub-id-type="medline">22694950</pub-id>
          <pub-id pub-id-type="pii">S0272-6386(12)00765-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Development and validation of a risk prediction model for end-stage renal disease in patients with type 2 diabetes</article-title>
          <source>Sci Rep</source>
          <year>2017</year>
          <month>08</month>
          <day>31</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>10177</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.1038/s41598-017-09243-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-017-09243-9</pub-id>
          <pub-id pub-id-type="medline">28860599</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-017-09243-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC5579050</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hagar</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Albers</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pivovarov</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chase</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Dukic</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Survival analysis with electronic health record data: experiments with chronic kidney disease</article-title>
          <source>Stat Anal Data Min</source>
          <year>2014</year>
          <volume>7</volume>
          <issue>5</issue>
          <fpage>385</fpage>
          <lpage>403</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/sam.11236"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/sam.11236</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Perotte</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ranganath</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hirsch</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Blei</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Risk prediction for chronic kidney disease progression using heterogeneous electronic health record data and time series analysis</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2015</year>
          <month>07</month>
          <volume>22</volume>
          <issue>4</issue>
          <fpage>872</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25896647"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocv024</pub-id>
          <pub-id pub-id-type="medline">25896647</pub-id>
          <pub-id pub-id-type="pii">ocv024</pub-id>
          <pub-id pub-id-type="pmcid">PMC4482276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nadkarni</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gottesman</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Ellis</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Bottinger</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Guttag</surname>
              <given-names>JV</given-names>
            </name>
          </person-group>
          <article-title>Incorporating temporal EHR data in predictive models for risk stratification of renal function deterioration</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>02</month>
          <volume>53</volume>
          <fpage>220</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(14)00235-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2014.11.005</pub-id>
          <pub-id pub-id-type="medline">25460205</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(14)00235-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC4520404</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koyner</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Carey</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Edelson</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Churpek</surname>
              <given-names>MM</given-names>
            </name>
          </person-group>
          <article-title>The development of a machine learning inpatient acute kidney injury prediction model</article-title>
          <source>Crit Care Med</source>
          <year>2018</year>
          <month>07</month>
          <volume>46</volume>
          <issue>7</issue>
          <fpage>1070</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1097/CCM.0000000000003123</pub-id>
          <pub-id pub-id-type="medline">29596073</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dafni</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Landmark analysis at the 25-year landmark point</article-title>
          <source>Circ Cardiovasc Qual Outcomes</source>
          <year>2011</year>
          <month>05</month>
          <volume>4</volume>
          <issue>3</issue>
          <fpage>363</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1161/CIRCOUTCOMES.110.957951</pub-id>
          <pub-id pub-id-type="medline">21586725</pub-id>
          <pub-id pub-id-type="pii">4/3/363</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wells</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chagin</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kattan</surname>
              <given-names>MW</given-names>
            </name>
          </person-group>
          <article-title>Using the landmark method for creating prediction models in large datasets derived from electronic health records</article-title>
          <source>Health Care Manag Sci</source>
          <year>2015</year>
          <month>03</month>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>86</fpage>
          <lpage>92</lpage>
          <pub-id pub-id-type="doi">10.1007/s10729-014-9281-3</pub-id>
          <pub-id pub-id-type="medline">24752545</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nichols</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Desai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lafata</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Lawrence</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Pathak</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Raebel</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Reid</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Selby</surname>
              <given-names>JV</given-names>
            </name>
            <name name-style="western">
              <surname>Silverman</surname>
              <given-names>BG</given-names>
            </name>
            <name name-style="western">
              <surname>Steiner</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Vupputuri</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Waitzfelder</surname>
              <given-names>B</given-names>
            </name>
            <collab>SUPREME-DM Study Group</collab>
          </person-group>
          <article-title>Construction of a multisite DataLink using electronic health records for the identification, surveillance, prevention, and management of diabetes mellitus: the SUPREME-DM project</article-title>
          <source>Prev Chronic Dis</source>
          <year>2012</year>
          <volume>9</volume>
          <fpage>E110</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/pcd/issues/2012/11_0311.htm"/>
          </comment>
          <pub-id pub-id-type="doi">10.5888/pcd9.110311</pub-id>
          <pub-id pub-id-type="medline">22677160</pub-id>
          <pub-id pub-id-type="pii">E110</pub-id>
          <pub-id pub-id-type="pmcid">PMC3457753</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>KDOQI</collab>
          </person-group>
          <article-title>KDOQI clinical practice guidelines and clinical practice recommendations for diabetes and chronic kidney disease</article-title>
          <source>Am J Kidney Dis</source>
          <year>2007</year>
          <month>02</month>
          <volume>49</volume>
          <issue>2 Suppl 2</issue>
          <fpage>S12</fpage>
          <lpage>154</lpage>
          <pub-id pub-id-type="doi">10.1053/j.ajkd.2006.12.005</pub-id>
          <pub-id pub-id-type="medline">17276798</pub-id>
          <pub-id pub-id-type="pii">S0272-6386(06)01843-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>American Diabetes Association</collab>
          </person-group>
          <article-title>Standards of medical care in diabetes-2018 abridged for primary care providers</article-title>
          <source>Clin Diabetes</source>
          <year>2018</year>
          <month>01</month>
          <volume>36</volume>
          <issue>1</issue>
          <fpage>14</fpage>
          <lpage>37</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29382975"/>
          </comment>
          <pub-id pub-id-type="doi">10.2337/cd17-0119</pub-id>
          <pub-id pub-id-type="medline">29382975</pub-id>
          <pub-id pub-id-type="pii">14</pub-id>
          <pub-id pub-id-type="pmcid">PMC5775000</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Levey</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Coresh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Greene</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>YL</given-names>
            </name>
            <name name-style="western">
              <surname>Hendriksen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kusek</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>van Lente</surname>
              <given-names>F</given-names>
            </name>
            <collab>Chronic Kidney Disease Epidemiology Collaboration</collab>
          </person-group>
          <article-title>Using standardized serum creatinine values in the modification of diet in renal disease study equation for estimating glomerular filtration rate</article-title>
          <source>Ann Intern Med</source>
          <year>2006</year>
          <month>08</month>
          <day>15</day>
          <volume>145</volume>
          <issue>4</issue>
          <fpage>247</fpage>
          <lpage>54</lpage>
          <pub-id pub-id-type="doi">10.7326/0003-4819-145-4-200608150-00004</pub-id>
          <pub-id pub-id-type="medline">16908915</pub-id>
          <pub-id pub-id-type="pii">145/4/247</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Waitman</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Warren</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Manos</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Connolly</surname>
              <given-names>DW</given-names>
            </name>
          </person-group>
          <article-title>Expressing observations from electronic medical record flowsheets in an i2b2 based clinical data repository to support research and quality improvement</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2011</year>
          <volume>2011</volume>
          <fpage>1454</fpage>
          <lpage>63</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22195209"/>
          </comment>
          <pub-id pub-id-type="medline">22195209</pub-id>
          <pub-id pub-id-type="pmcid">PMC3243191</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Weber</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mendis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gainer</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Chueh</surname>
              <given-names>HC</given-names>
            </name>
            <name name-style="western">
              <surname>Churchill</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Serving the enterprise and beyond with informatics for integrating biology and the bedside (i2b2)</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>2</issue>
          <fpage>124</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20190053"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.000893</pub-id>
          <pub-id pub-id-type="medline">20190053</pub-id>
          <pub-id pub-id-type="pii">17/2/124</pub-id>
          <pub-id pub-id-type="pmcid">PMC3000779</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Waitman</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Robbins</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>An exploration of ontology-based EMR data abstraction for diabetic kidney disease prediction</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2019</year>
          <volume>2019</volume>
          <fpage>704</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31259027"/>
          </comment>
          <pub-id pub-id-type="medline">31259027</pub-id>
          <pub-id pub-id-type="pmcid">PMC6568123</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Damle</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Alavi</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>The University Healthsystem Consortium clinical database: An emerging resource in colorectal surgery research</article-title>
          <source>Semin Colon Rectal Surg</source>
          <year>2016</year>
          <month>06</month>
          <volume>27</volume>
          <issue>2</issue>
          <fpage>92</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1053/j.scrs.2016.01.006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hutchinson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>LP</given-names>
            </name>
            <name name-style="western">
              <surname>Dietterich</surname>
              <given-names>TG</given-names>
            </name>
          </person-group>
          <article-title>Incorporating Boosted Regression Trees Into Ecological Latent Variable Models</article-title>
          <source>Proceedings of the Twenty-Fifth AAAI Conference on Artificial Intelligence</source>
          <year>2011</year>
          <conf-name>AAAI'11</conf-name>
          <conf-date>August 7-11, 2011</conf-date>
          <conf-loc>San Francisco, California</conf-loc>
          <fpage>1343</fpage>
          <lpage>8</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Learning nonlinear functions using regularized greedy forest</article-title>
          <source>IEEE Trans Pattern Anal Mach Intell</source>
          <year>2014</year>
          <month>05</month>
          <volume>36</volume>
          <issue>5</issue>
          <fpage>942</fpage>
          <lpage>54</lpage>
          <pub-id pub-id-type="doi">10.1109/TPAMI.2013.159</pub-id>
          <pub-id pub-id-type="medline">26353228</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Amos</surname>
              <given-names>CI</given-names>
            </name>
            <name name-style="western">
              <surname>Hyslop</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Component-wise gradient boosting and false discovery control in survival analysis with high-dimensional covariates</article-title>
          <source>Bioinformatics</source>
          <year>2016</year>
          <month>01</month>
          <day>1</day>
          <volume>32</volume>
          <issue>1</issue>
          <fpage>50</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26382192"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btv517</pub-id>
          <pub-id pub-id-type="medline">26382192</pub-id>
          <pub-id pub-id-type="pii">btv517</pub-id>
          <pub-id pub-id-type="pmcid">PMC4757968</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Torlay</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Perrone-Bertolotti</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Baciu</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Machine learning-XGBoost analysis of language networks to classify patients with epilepsy</article-title>
          <source>Brain Inform</source>
          <year>2017</year>
          <month>09</month>
          <volume>4</volume>
          <issue>3</issue>
          <fpage>159</fpage>
          <lpage>69</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://link.springer.com/article/10.1007/s40708-017-0065-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40708-017-0065-7</pub-id>
          <pub-id pub-id-type="medline">28434153</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40708-017-0065-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5563301</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kalousis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Prados</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hilario</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Stability of feature selection algorithms: a study on high-dimensional spaces</article-title>
          <source>Knowl Inf Syst</source>
          <year>2007</year>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>95</fpage>
          <lpage>116</lpage>
          <pub-id pub-id-type="doi">10.1007/s10115-006-0040-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>Greedy function approximation: a gradient boosting machine</article-title>
          <source>Ann Stat</source>
          <year>2001</year>
          <month>10</month>
          <volume>29</volume>
          <issue>5</issue>
          <fpage>1189</fpage>
          <lpage>232</lpage>
          <pub-id pub-id-type="doi">10.1214/aos/1013203451</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>XGBoost: A Scalable Tree Boosting System</article-title>
          <source>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2016</year>
          <conf-name>KDD'16</conf-name>
          <conf-date>August 13-17, 2016</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <fpage>785</fpage>
          <lpage>94</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Goadrich</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The relationship between Precision-Recall and ROC curves</article-title>
          <source>Proceedings of the 23rd international conference on Machine learning</source>
          <year>2006</year>
          <conf-name>ICML'06</conf-name>
          <conf-date>June 25-29, 2006</conf-date>
          <conf-loc>Pittsburgh, PA</conf-loc>
          <fpage>233</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1145/1143844.1143874</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hood</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Friend</surname>
              <given-names>SH</given-names>
            </name>
          </person-group>
          <article-title>Predictive, personalized, preventive, participatory (P4) cancer medicine</article-title>
          <source>Nat Rev Clin Oncol</source>
          <year>2011</year>
          <month>03</month>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>184</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1038/nrclinonc.2010.227</pub-id>
          <pub-id pub-id-type="medline">21364692</pub-id>
          <pub-id pub-id-type="pii">nrclinonc.2010.227</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Waitman</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Robins</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Robust clinical marker identification for diabetic kidney disease with ensemble feature selection</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2019</year>
          <month>03</month>
          <day>1</day>
          <volume>26</volume>
          <issue>3</issue>
          <fpage>242</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocy165</pub-id>
          <pub-id pub-id-type="medline">30602020</pub-id>
          <pub-id pub-id-type="pii">5271071</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
