<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i8e39057</article-id>
      <article-id pub-id-type="pmid">36040784</article-id>
      <article-id pub-id-type="doi">10.2196/39057</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Standard Vocabularies to Improve Machine Learning Model Transferability With Electronic Health Record Data: Retrospective Cohort Study Using Health Care–Associated Infection</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ren</surname>
            <given-names>Ziyou</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Xu</surname>
            <given-names>Yun</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Kiser</surname>
            <given-names>Amber C</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biomedical Informatics</institution>
            <institution>School of Medicine</institution>
            <institution>University of Utah</institution>
            <addr-line>421 Wakara Way</addr-line>
            <addr-line>Suite 140</addr-line>
            <addr-line>Salt Lake City, UT, 84108</addr-line>
            <country>United States</country>
            <phone>1 801 581 4080</phone>
            <email>amber.kiser@utah.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3025-3853</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Eilbeck</surname>
            <given-names>Karen</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0831-6427</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Ferraro</surname>
            <given-names>Jeffrey P</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4111-2055</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Skarda</surname>
            <given-names>David E</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3362-0666</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Samore</surname>
            <given-names>Matthew H</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4862-9196</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Bucher</surname>
            <given-names>Brian</given-names>
          </name>
          <degrees>MS, MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8376-9752</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>School of Medicine</institution>
        <institution>University of Utah</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Medicine</institution>
        <institution>School of Medicine</institution>
        <institution>University of Utah</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Center for Value-Based Surgery</institution>
        <institution>Intermountain Healthcare</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Surgery</institution>
        <institution>School of Medicine</institution>
        <institution>University of Utah</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Informatics, Decision-Enhancement and Analytic Sciences Center 2.0</institution>
        <institution>Veterans Affairs Salt Lake City Health Care System</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Amber C Kiser <email>amber.kiser@utah.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>30</day>
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>8</issue>
      <elocation-id>e39057</elocation-id>
      <history>
        <date date-type="received">
          <day>27</day>
          <month>4</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>15</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>9</day>
          <month>8</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>15</day>
          <month>8</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Amber C Kiser, Karen Eilbeck, Jeffrey P Ferraro, David E Skarda, Matthew H Samore, Brian Bucher. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 30.08.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/8/e39057" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>With the widespread adoption of electronic healthcare records (EHRs) by US hospitals, there is an opportunity to leverage this data for the development of predictive algorithms to improve clinical care. A key barrier in model development and implementation includes the external validation of model discrimination, which is rare and often results in worse performance. One reason why machine learning models are not externally generalizable is data heterogeneity. A potential solution to address the substantial data heterogeneity between health care systems is to use standard vocabularies to map EHR data elements. The advantage of these vocabularies is a hierarchical relationship between elements, which allows the aggregation of specific clinical features to more general grouped concepts.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to evaluate grouping EHR data using standard vocabularies to improve the transferability of machine learning models for the detection of postoperative health care–associated infections across institutions with different EHR systems.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Patients who underwent surgery from the University of Utah Health and Intermountain Healthcare from July 2014 to August 2017 with complete follow-up data were included. The primary outcome was a health care–associated infection within 30 days of the procedure. EHR data from 0-30 days after the operation were mapped to standard vocabularies and grouped using the hierarchical relationships of the vocabularies. Model performance was measured using the area under the receiver operating characteristic curve (AUC) and <italic>F</italic><sub>1</sub>-score in internal and external validations. To evaluate model transferability, a difference-in-difference metric was defined as the difference in performance drop between internal and external validations for the baseline and grouped models.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>A total of 5775 patients from the University of Utah and 15,434 patients from Intermountain Healthcare were included. The prevalence of selected outcomes was from 4.9% (761/15,434) to 5% (291/5775) for surgical site infections, from 0.8% (44/5775) to 1.1% (171/15,434) for pneumonia, from 2.6% (400/15,434) to 3% (175/5775) for sepsis, and from 0.8% (125/15,434) to 0.9% (50/5775) for urinary tract infections. In all outcomes, the grouping of data using standard vocabularies resulted in a reduced drop in AUC and <italic>F</italic><sub>1</sub>-score in external validation compared to baseline features (all <italic>P</italic>&#60;.001, except urinary tract infection AUC: <italic>P</italic>=.002). The difference-in-difference metrics ranged from 0.005 to 0.248 for AUC and from 0.075 to 0.216 for <italic>F</italic><sub>1</sub>-score.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We demonstrated that grouping machine learning model features based on standard vocabularies improved model transferability between data sets across 2 institutions. Improving model transferability using standard vocabularies has the potential to improve the generalization of clinical prediction models across the health care system.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>standard vocabularies</kwd>
        <kwd>machine learning</kwd>
        <kwd>electronic health records</kwd>
        <kwd>model transferability</kwd>
        <kwd>data heterogeneity</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The widespread adoption of electronic healthcare records (EHRs) by US hospitals has created an opportunity to leverage this data for the development of predictive algorithms to improve clinical care [<xref ref-type="bibr" rid="ref1">1</xref>]. Various machine learning (ML) models have been developed to predict a variety of outcomes, including pneumonia, sepsis, and surgical site infection [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. However, relatively few of these models have been implemented into clinical practice [<xref ref-type="bibr" rid="ref6">6</xref>]. A key barrier in model development includes the validation of model discrimination across data sets [<xref ref-type="bibr" rid="ref7">7</xref>]. Typically, validation occurs using a blind subset of data from the training data set, termed internal validation. External validation using data from a different institution is rare and often results in worse performance [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>].</p>
      <p>There are many reasons why ML models are not externally generalizable, including inadequate training data, overfitting of the model, and data heterogeneity [<xref ref-type="bibr" rid="ref10">10</xref>]. With 684 different EHR vendors in the United States, the lack of interoperability between institutions, even among those with the same EHR system, substantially inhibits ML model generalizability [<xref ref-type="bibr" rid="ref11">11</xref>]. Various methods have been proposed to improve the generalizability of ML models, including transfer learning, deep learning, and common data models (CDMs) [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. However, data heterogeneity is an underappreciated key determinant of model transferability [<xref ref-type="bibr" rid="ref17">17</xref>]. Data heterogeneity deriving from variation in laboratory practices, hospital medication formularies, and administrative coding practices between health care systems can impact model performance during external validation, resulting in a decreased transferability of models across institutions [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
      <p>A solution to address the substantial data heterogeneity between health care systems is to use standard vocabularies to map EHR data elements. These vocabularies, such as the Clinical Classification Software (CCS) for International Classification of Diseases (ICD) Diagnosis Codes, Logical Observation Identifiers Names and Codes (LOINC) for health care observations, and Medi-Span for medications, can be used to support data harmonization between data sets [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref23">23</xref>]. The advantage of these vocabularies is a hierarchical relationship between elements, which allows the aggregation of specific clinical features to more general grouped concepts. For example, <xref rid="figure1" ref-type="fig">Figure 1</xref> demonstrates how multiple ICD diagnosis codes describing “urinary tract infections” can be aggregated to 1 single CCS code. Due to variation in coding practices among health care facilities, the aggregation of concepts may improve ML model transferability during external validation.</p>
      <p>This study’s objective was to evaluate whether aggregating EHR data elements using standard vocabularies would improve ML model transferability to an external data set. Although other works have used this method of grouping EHR data elements when developing ML models, none to our knowledge have assessed the impact of grouping on model transferability to an external data set [<xref ref-type="bibr" rid="ref17">17</xref>]. To evaluate this objective, we classified postoperative health care–associated infections (HAIs) using EHR data from 2 independent health care systems.</p>
      <p>HAIs pose a substantial patient safety concern, raise costs, and increase the risk of death after surgical procedures. HAIs occur in 3% to 27% of surgical patients [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. Developing even 1 major postoperative complication increases a patient’s risk of postoperative mortality and readmission [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. To address the challenges of HAIs, hospitals rely on surveillance programs to monitor HAI rates and develop targeted interventions to address postoperative HAIs. Hospitals that participate in quality surveillance programs reduce HAIs over time [<xref ref-type="bibr" rid="ref28">28</xref>]. However, hospital surveillance programs rely on a manual chart review process, which is a critical barrier to the widespread adoption of surgical quality assurance programs. To overcome these difficulties, automated surveillance programs are needed to decrease the burden of the manual chart review process [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. We hypothesized that ML models for HAI detection using grouped features from EHR data would improve model performance during external validation compared to ML models developed using baseline features.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Example of the aggregation of baseline features to grouped concepts. Multiple ICD diagnosis codes describing “urinary tract infections,” including 10 used only in Hospital A, 5 used only in Hospital B, 11 used at both Hospital A and B, and 61 not used in either hospital, can be aggregated to 1 single CCS code. CCS: Clinical Classification Software; ICD: International Classification of Diseases.</p>
        </caption>
        <graphic xlink:href="medinform_v10i8e39057_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Setting</title>
        <p>We performed a retrospective cohort study using data from 2 independent health care systems: the University of Utah Health (Hospital A) with an Epic EHR and Intermountain Healthcare (Hospital B) with a Cerner EHR.</p>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>The institutional review boards at each health care system approved the study (University of Utah Health: 87482; Intermountain Healthcare: 1050851), granting a waiver of informed consent.</p>
      </sec>
      <sec>
        <title>Data Sources, Participants, and Outcomes</title>
        <p>Data for the study were obtained from the American College of Surgeons (ACS) National Surgical Quality Improvement Program (NSQIP) at each institution. The ACS NSQIP program is the largest surgical quality assessment program in the United States, found in over 450 hospitals [<xref ref-type="bibr" rid="ref31">31</xref>]. As part of the program, the surgical clinical reviewers, typically nurses, are trained in NSQIP methodology and definitions [<xref ref-type="bibr" rid="ref32">32</xref>]. NSQIP surgical clinical reviewers manually review the EHR records for all selected operative episodes to identify perioperative complications, including HAI, occurring within 30 days of the operation. All identified complications are rereviewed by the ACS surgeon champion at the participating hospital to ensure that the complications meet the ACS NSQIP definitions. Disagreements are settled when a consensus is reached, with the ACS surgeon acting as adjudicator. The interrater reliability and data quality of the NSQIP program have been previously documented [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
        <p>For this study, patient operative episodes were included if they underwent manual chart review as part of the ACS NSQIP program at each institution. Operative events were excluded if they had incomplete follow-up data.</p>
        <p>The following HAIs were chosen as outcomes due to their prevalence and clinical relevance: surgical site infection (SSI), pneumonia, sepsis, and urinary tract infection (UTI). These outcomes were selected as they are the most common complications occurring after general and thoracic surgical procedures [<xref ref-type="bibr" rid="ref33">33</xref>]. In addition, these complications are the most common underlying cause for hospital readmission after surgical procedures [<xref ref-type="bibr" rid="ref27">27</xref>]. Cases were defined according to standard NSQIP definitions and labeled as binary values for classification.</p>
      </sec>
      <sec>
        <title>EHR Data Element Mapping</title>
        <p>For selected operative events, we obtained all laboratory test results, medication administration, and ICD 9th and 10th editions diagnosis codes from the EHR between 0-30 days after surgery. Although diagnosis codes are an important indicator of HAI, they often suffer from low sensitivity [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. We chose to include additional clinical features, including laboratory tests and medications, to increase the sensitivity of our models. Each data category was mapped to a standard vocabulary and grouped based on the hierarchical relationships within the standard vocabularies. The Agency for Healthcare Research and Quality provides a mapping from both ICD-9 and ICD-10 codes to CCS codes in the form of a CSV file [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. Diagnosis codes, represented as ICD codes in the EHR, were manually aggregated into single-level CCS codes using the CCS mapping. Laboratory test results were manually mapped to the LOINC terminology and then aggregated into LOINC groups [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Medications were automatically mapped to the Medi-Span Generic Product Identifier within the EHR [<xref ref-type="bibr" rid="ref23">23</xref>]. In the Medi-Span hierarchy, we categorized the lowest level as baseline and the highest level as grouped. <xref rid="figure2" ref-type="fig">Figure 2</xref> provides examples of aggregation for each data category. Once mapped, we created 2 discrete data sets. The baseline data set consisted of ICD codes, LOINC tests, and Medi-Span drug names. The grouped data set consisted of aggregated features, including CCS codes, LOINC groups, and Medi-Span drug groups.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Example of data aggregation. ICD diagnosis codes were manually aggregated into single-level CCS codes. LOINC observations were aggregated into LOINC groups, consisting of a single possible level. Medi-Span consisted of 5 different possible levels of aggregation. Medi-Span drug names were grouped into the highest level of aggregation—Medi-Span drug groups. CCS: Clinical Classification Software; ICD: International Classification of Disease; LOINC: Logical Observation Identifiers Names and Codes.</p>
          </caption>
          <graphic xlink:href="medinform_v10i8e39057_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Model Development</title>
        <p>To avoid data leakage and overfitting, we divided the data from Hospital A into hyperparameter tuning/training (70%) and internal validation (30%) data sets before preprocessing or model development. For external validation, we used 100% of the data from Hospital B. Missing data were addressed by imputing 0 for nominal variables and the median value—calculated from the training data—for continuous variables [<xref ref-type="bibr" rid="ref36">36</xref>]. Data were standardized to have a mean of 0 and SD of 1. <xref rid="figure3" ref-type="fig">Figure 3</xref> briefly describes the flow of the data through model development, validation, and final evaluation.</p>
        <p>Separate models were developed for each outcome and data set (baseline or grouped). Each model classified whether an operative event resulted in the relevant HAI within 30 days. Important features were identified based on the ANOVA <italic>F</italic>-score. Data sets with different numbers of <italic>n</italic>-top important features were created. In all, 4 ML algorithms were evaluated: random forest, support vector machine, logistic regression, and XGBoost [<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref41">41</xref>]. The number of features and algorithm were included as parameters in model tuning. For each model, tuning was performed using 10-fold cross-validation to improve the internal training. The best model was selected using the area under the receiver operating characteristic curve (AUC) and <italic>F</italic><sub>1</sub>-score [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. The final training of the models was completed using the whole training data set. To address the class imbalance, random undersampling was used during tuning within each fold of cross-validation and during final training [<xref ref-type="bibr" rid="ref44">44</xref>]. We did not perform any balancing during validation as we wanted to test in an environment similar to real-life data where we would expect an imbalance. Model development was completed using Python software (version 3.7; Python Software Foundation) and the <italic>scikit-learn</italic> (version 0.22.1), <italic>imblearn</italic> (version 0.6.2), and <italic>xgboost</italic> (version 1.2.1) packages [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>].</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Flow of data through the study with the derivation for the final difference-in-difference (DiD) metric. Final evaluation steps to calculate the DiD included (1) performance difference between the internal and external validations for the baseline model; (2) performance difference between the internal and external validations for the grouped model; and (3) difference in the performance differences between the baseline and grouped models. AUC: area under the receiver operating characteristic curve.</p>
          </caption>
          <graphic xlink:href="medinform_v10i8e39057_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Validation</title>
        <p>For each model, we performed internal and external validations. For each outcome, we calculated the difference-in-difference (DiD) defined in <xref rid="figure3" ref-type="fig">Figure 3</xref>. DiD is a metric previously used in economics to evaluate the difference in means between 2 groups, generally a control group and an intervention group [<xref ref-type="bibr" rid="ref47">47</xref>]. We applied it in our study to assess the difference in performance between the baseline and grouped models. A positive DiD indicates that the model developed using grouped features resulted in a reduced drop in performance during external validation compared to the model developed using baseline features.</p>
      </sec>
      <sec>
        <title>Sensitivity Analyses</title>
        <sec>
          <title>Analysis of Nonshared Codes</title>
          <p>A separate granular data set, including baseline features but restricted to those shared by both hospital systems, was created to investigate the magnitude of performance drop in external validation attributable to nonshared codes. Training and validation were conducted as previously described. We calculated the DiD as described in <xref rid="figure3" ref-type="fig">Figure 3</xref>.</p>
        </sec>
        <sec>
          <title>Analysis of Grouping Individual Categories of Data</title>
          <p>We investigated the effect of grouping individual data categories, using only SSI, as this outcome was the most prevalent in the data. Training and validation were conducted as previously described. We compared the baseline model with models developed using data sets created with different combinations of baseline and grouped data. The combination data sets were (1) baseline diagnosis codes and laboratory tests with grouped medications, (2) baseline diagnosis codes and medications with grouped laboratory tests, and (3) grouped diagnosis codes with baseline laboratory tests and medications. We calculated the DiD as described in <xref rid="figure3" ref-type="fig">Figure 3</xref>.</p>
        </sec>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>We performed a chi-square test of independence to determine any differences in the prevalence of the outcomes and categorical demographic variables between the institutions. For continuous demographic variables, we performed a 2-tailed, 2-sample <italic>t</italic> test to determine any differences between institutions. To measure model performance, bootstrapping for 1000 iterations was used to measure the mean with 95% CIs [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]. A 1-tailed, 1-sample <italic>t</italic> test was used to evaluate whether DiD metrics were significantly greater than 0. All statistical tests were completed using the <italic>SciPy</italic> package in Python [<xref ref-type="bibr" rid="ref50">50</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Cohort and Feature Description</title>
        <p>A total of 5775 operative events were retrieved from Hospital A, whereas a total of 15,434 operative events were retrieved from Hospital B. <xref ref-type="table" rid="table1">Table 1</xref> describes the study demographics.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Study demographics for both internal and external data sets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="0"/>
            <col width="430"/>
            <col width="220"/>
            <col width="230"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Characteristic</td>
                <td>Hospital A (internal; N=5775)</td>
                <td>Hospital B (external; N=15,434)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">Age at time of surgery (years), mean (SD)</td>
                <td>52.6 (16.6)</td>
                <td>53.4 (18.1)</td>
                <td>.01</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Gender, male, n (%)</td>
                <td>2765 (47.9)</td>
                <td>7576 (49.1)</td>
                <td>.12</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Race, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">American Indian or Alaska Native</td>
                <td>86 (1.5)</td>
                <td>59 (0.4)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Asian</td>
                <td>81 (1.4)</td>
                <td>192 (1.2)</td>
                <td>.40</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Black or African American</td>
                <td>65 (1.1)</td>
                <td>127 (0.8)</td>
                <td>.05</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Native Hawaiian or Pacific Islander</td>
                <td>34 (0.6)</td>
                <td>147 (1)</td>
                <td>.05</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">White</td>
                <td>5275 (91.3)</td>
                <td>14,216 (92.1)</td>
                <td>.07</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Unknown or not reported</td>
                <td>234 (4.1)</td>
                <td>693 (4.5)</td>
                <td>.18</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Ethnicity, Hispanic, n (%)</td>
                <td>575 (10)</td>
                <td>1384 (9)</td>
                <td>.03</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Procedure Current Procedural Terminology code, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">0-29999 (skin/soft tissue)</td>
                <td>968 (16.8)</td>
                <td>2020 (13.1)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">30000-39999 (cardiovascular)</td>
                <td>594 (10.3)</td>
                <td>2222 (14.4)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">40000-49999 (gastrointestinal)</td>
                <td>4172 (72.2)</td>
                <td>10,796 (69.9)</td>
                <td>.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">50000-59999 (genitourinary)</td>
                <td>27 (0.5)</td>
                <td>99 (0.6)</td>
                <td>.17</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">60000-69999 (nervous system)</td>
                <td>14 (0.2)</td>
                <td>297 (1.9)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Inpatient or outpatient status, inpatient, n (%)</td>
                <td>2831 (49)</td>
                <td>7837 (50.8)</td>
                <td>.02</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Comorbidities, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Diabetes mellitus</td>
                <td>822 (14.2)</td>
                <td>2144 (13.9)</td>
                <td>.54</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Current smoker within 1 year</td>
                <td>799 (13.8)</td>
                <td>2248 (14.6)</td>
                <td>.18</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Dyspnea</td>
                <td>498 (8.6)</td>
                <td>373 (2.4)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Functional heath status</td>
                <td>71 (1.2)</td>
                <td>376 (2.4)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Being ventilator-dependent</td>
                <td>20 (0.3)</td>
                <td>149 (1)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>History of severe chronic obstructive pulmonary disease</td>
                <td>128 (2.2)</td>
                <td>417 (2.7)</td>
                <td>.05</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Ascites within 30 days prior to surgery</td>
                <td>8 (0.1)</td>
                <td>114 (0.7)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Congestive heart failure within 30 days prior to surgery</td>
                <td>24 (0.4)</td>
                <td>123 (0.8)</td>
                <td>.004</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Hypertension requiring medication</td>
                <td>1940 (33.6)</td>
                <td>5455 (35.3)</td>
                <td>.02</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Acute renal failure</td>
                <td>9 (0.2)</td>
                <td>53 (0.3)</td>
                <td>.03</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Currently requiring or on dialysis</td>
                <td>100 (1.7)</td>
                <td>283 (1.8)</td>
                <td>.66</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Disseminated cancer</td>
                <td>187 (3.2)</td>
                <td>246 (1.6)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Open wound with or without infection</td>
                <td>287 (5)</td>
                <td>512 (3.3)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Steroid or immunosuppressant use for chronic condition</td>
                <td>351 (6.1)</td>
                <td>644 (4.2)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>&#62;10% loss of body weight in the 6 months prior to surgery</td>
                <td>145 (2.5)</td>
                <td>372 (2.4)</td>
                <td>.71</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Bleeding disorder</td>
                <td>151 (2.6)</td>
                <td>1013 (6.6)</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p><xref ref-type="table" rid="table2">Table 2</xref> describes the prevalence of HAI outcomes within each institution. There were no significant differences in the prevalence of SSI (<italic>P</italic>=.77), sepsis (<italic>P</italic>=.09), or UTI (<italic>P</italic>=.75). The prevalence of pneumonia was significantly higher (<italic>P</italic>=.03) in Hospital B.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Prevalence of selected outcomes in each hospital system.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="280"/>
            <col width="280"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Outcome</td>
                <td>Hospital A (N=5775), n (%)</td>
                <td>Hospital B (N=15,434), n (%)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Surgical site infection</td>
                <td>291 (5)</td>
                <td>761 (4.9)</td>
                <td>.77</td>
              </tr>
              <tr valign="top">
                <td>Pneumonia</td>
                <td>44 (0.8)</td>
                <td>171 (1.1)</td>
                <td>.03<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>Sepsis</td>
                <td>175 (3)</td>
                <td>400 (2.6)</td>
                <td>.09</td>
              </tr>
              <tr valign="top">
                <td>Urinary tract infection</td>
                <td>50 (0.9)</td>
                <td>125 (0.8)</td>
                <td>.75</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Pneumonia was significantly more prevalent in Hospital B (<italic>P</italic>&#60;.05).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Model Development and Validation</title>
        <p>DiD metrics are reported in <xref ref-type="table" rid="table3">Table 3</xref>. Tables S1 and S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> detail the selected model parameters. Model calibration can be found in Table S3 and Figures S1-S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Standards for Reporting Diagnostic Accuracy Studies flow diagrams of patient data through the top-performing models can be seen in Figures S5-S16 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>After external validation, all models produced significantly positive AUC and <italic>F</italic><sub>1</sub>-score DiDs when comparing the performance of the baseline and grouped models (all <italic>P</italic>&#60;.001, except UTI AUC: <italic>P</italic>=.002). A forest plot in Figure S17 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> illustrates the AUC and <italic>F</italic><sub>1</sub>-score DiDs.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Difference-in-difference (DiD) metrics for each outcome. Means are based on 1000 bootstrapped iterations with 95% CIs. A positive DiD indicates that the grouped model resulted in a reduced drop in performance compared to the baseline model.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Outcome, metric</td>
                <td>Top baseline algorithm</td>
                <td>Top grouped algorithm</td>
                <td>Baseline internal validation, mean (95% CI)</td>
                <td>Baseline external validation, mean (95% CI)</td>
                <td>Grouped internal validation, mean (95% CI)</td>
                <td>Grouped external validation, mean (95% CI)</td>
                <td>DiD, mean (95% CI)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">
                  <bold>SSI<sup>a</sup></bold>
                </td>
                <td>SVM<sup>b</sup></td>
                <td>LR<sup>c</sup></td>
                <td colspan="6">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AUC<sup>d</sup></td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>0.906 (0.904-0.908)</td>
                <td>0.763 (0.762-0.764)</td>
                <td>0.904 (0.903-0.906)</td>
                <td>0.833 (0.833-0.834)</td>
                <td>0.072 (0.070-0.074)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td><italic>F<sub>1</sub></italic>-score</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>0.501 (0.499-0.503)</td>
                <td>0.300 (0.299-0.302)</td>
                <td>0.476 (0.474-0.478)</td>
                <td>0.376 (0.375-0.376)</td>
                <td>0.100 (0.097-0.103)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Pneumonia</bold>
                </td>
                <td>LR</td>
                <td>SVM</td>
                <td colspan="6">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AUC</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>0.953 (0.949-0.957)</td>
                <td>0.683 (0.682-0.685)</td>
                <td>0.994 (0.994-0.995)</td>
                <td>0.973 (0.973-0.974)</td>
                <td>0.250 (0.247-0.252)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td><italic>F<sub>1</sub></italic>-score</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>0.504 (0.498-0.509)</td>
                <td>0.302 (0.299-0.305)</td>
                <td>0.456 (0.452-0.461)</td>
                <td>0.467 (0.465-0.468)</td>
                <td>0.212 (0.206-0.218)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Sepsis</bold>
                </td>
                <td>LR</td>
                <td>RF<sup>e</sup></td>
                <td colspan="6">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AUC</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>0.964 (0.963-0.964)</td>
                <td>0.890 (0.889-0.891)</td>
                <td>0.948 (0.946-0.949)</td>
                <td>0.883 (0.883-0.884)</td>
                <td>0.008 (0.007-0.010)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td><italic>F<sub>1</sub></italic>-score</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>0.469 (0.467-0.472)</td>
                <td>0.050 (0.050-0.050)</td>
                <td>0.419 (0.416-0.422)</td>
                <td>0.092 (0.092-0.093)</td>
                <td>0.091 (0.089-0.093)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>UTI<sup>f</sup></bold>
                </td>
                <td>SVM</td>
                <td>LR</td>
                <td colspan="6">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AUC</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>0.898 (0.895-0.900)</td>
                <td>0.886 (0.885-0.887)</td>
                <td>0.936 (0.934-0.939)</td>
                <td>0.929 (0.928-0.930)</td>
                <td>0.006 (0.002-0.009)</td>
                <td>.002</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td><italic>F<sub>1</sub></italic>-score</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>0.153 (0.148-0.158)</td>
                <td>0.063 (0.061-0.064)</td>
                <td>0.244 (0.241-0.246)</td>
                <td>0.225 (0.224-0.226)</td>
                <td>0.073 (0.068-0.077)</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>SSI: surgical site infection.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>LR: logistic regression.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>AUC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>RF: random forest.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>UTI: urinary tract infection.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Sensitivity Analyses</title>
        <sec>
          <title>Effect of Nonshared Codes</title>
          <p><xref ref-type="table" rid="table4">Table 4</xref> describes the EHR data elements shared between hospitals. We found that 44.8% (4284/9559) of baseline features present in the training set were not present in the external set, whereas all grouped features present in the training set were present in the external set.</p>
          <p>After external validation, all models, except UTI (<italic>P</italic>=.002), produced significantly positive AUC DiDs (all <italic>P</italic>&#60;.001) when comparing the performance of the baseline and granular models. All outcomes produced significantly positive <italic>F</italic><sub>1</sub>-score DiDs (all <italic>P</italic>&#60;.001) when comparing the performance of the baseline and granular models.</p>
          <p>The magnitude of the AUC and <italic>F</italic><sub>1</sub>-score DiDs calculated from the comparison of the baseline and grouped models were greater than those calculated from the comparison of the baseline and granular models in all outcomes, except the AUC DiD for sepsis, as represented in <xref ref-type="table" rid="table5">Table 5</xref>. Full internal and external validation results can be found in Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>Number of features in each category (diagnosis, medication, and laboratory) for Hospital A, Hospital B, and those shared between them.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="330"/>
              <col width="250"/>
              <col width="250"/>
              <col width="140"/>
              <thead>
                <tr valign="top">
                  <td colspan="2">Features</td>
                  <td>Training Set (Hospital A), n</td>
                  <td>External Set (Hospital B), n</td>
                  <td>Shared, n</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Baseline</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Total</td>
                  <td>9559</td>
                  <td>7926</td>
                  <td>5275</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>ICD<sup>a</sup> diagnosis codes</td>
                  <td>7708</td>
                  <td>6859</td>
                  <td>4392</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Medi-Span drug names</td>
                  <td>1311</td>
                  <td>531</td>
                  <td>531</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>LOINC<sup>b</sup> codes</td>
                  <td>540</td>
                  <td>536</td>
                  <td>352</td>
                </tr>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Grouped</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Total</td>
                  <td>805</td>
                  <td>817</td>
                  <td>805</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>CCS<sup>c</sup> diagnosis codes</td>
                  <td>287</td>
                  <td>287</td>
                  <td>287</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Medi-Span drug groups</td>
                  <td>94</td>
                  <td>94</td>
                  <td>94</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>LOINC groups</td>
                  <td>424</td>
                  <td>436</td>
                  <td>424</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table4fn1">
                <p><sup>a</sup>ICD: International Classification of Diseases.</p>
              </fn>
              <fn id="table4fn2">
                <p><sup>b</sup>LOINC: Logical Observation Identifiers Names and Codes.</p>
              </fn>
              <fn id="table4fn3">
                <p><sup>c</sup>CCS: Clinical Classification Software.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <table-wrap position="float" id="table5">
            <label>Table 5</label>
            <caption>
              <p>Difference-in-difference (DiD) metrics for the comparison between baseline and granular models and the comparison between baseline and grouped models. A positive DiD indicates the comparison model resulted in a reduced drop in performance compared to the baseline model.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="270"/>
              <col width="350"/>
              <col width="350"/>
              <thead>
                <tr valign="top">
                  <td colspan="2">Metric, outcome</td>
                  <td>Granular comparison, DiD (95% CI)</td>
                  <td>Grouped comparison, DiD (95% CI)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="4">
                    <bold>AUC<sup>a</sup></bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>SSI<sup>b</sup></td>
                  <td>0.035 (0.033-0.037)</td>
                  <td>0.072 (0.070-0.074)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Pneumonia</td>
                  <td>0.226 (0.223-0.229)</td>
                  <td>0.250 (0.247-0.252)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Sepsis</td>
                  <td>0.015 (0.013-0.017)</td>
                  <td>0.008 (0.007-0.010)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>UTI<sup>c</sup></td>
                  <td>–0.049 (–0.052 to –0.045)</td>
                  <td>0.006 (0.002-0.009)</td>
                </tr>
                <tr valign="top">
                  <td colspan="4">
                    <bold><italic>F<sub>1</sub></italic>-score</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>SSI</td>
                  <td>0.017 (0.014-0.020)</td>
                  <td>0.100 (0.097-0.103)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Pneumonia</td>
                  <td>0.186 (0.179-0.193)</td>
                  <td>0.212 (0.206-0.218)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Sepsis</td>
                  <td>0.026 (0.023-0.028)</td>
                  <td>0.091 (0.089-0.093)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>UTI</td>
                  <td>0.039 (0.035-0.043)</td>
                  <td>0.073 (0.068-0.077)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table5fn1">
                <p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p>
              </fn>
              <fn id="table5fn2">
                <p><sup>b</sup>SSI: surgical site infection.</p>
              </fn>
              <fn id="table5fn3">
                <p><sup>c</sup>UTI: urinary tract infection.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Effect of Grouping Individual Categories of Data</title>
          <p>In the second sensitivity analysis, all AUC and <italic>F</italic><sub>1</sub>-score DiDs were significantly positive (all <italic>P</italic>&#60;.001) when comparing the performance of the baseline and combination models, as displayed in <xref ref-type="table" rid="table6">Table 6</xref>. The combination model with grouped medications, Combination 1, resulted in the greatest AUC DiD. The combination model with grouped diagnosis codes, Combination 3, resulted in the greatest <italic>F</italic><sub>1</sub>-score DiD.</p>
          <table-wrap position="float" id="table6">
            <label>Table 6</label>
            <caption>
              <p>Comparison of models developed from baseline data with models developed from the combination of baseline and grouped data. The difference-in-difference (DiD) reflects the AUC and <italic>F</italic><sub>1</sub>-score for surgical site infection. A positive DiD indicates the combination model resulted in a smaller drop in performance than the baseline model.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="140"/>
              <col width="120"/>
              <col width="130"/>
              <col width="130"/>
              <col width="180"/>
              <col width="180"/>
              <col width="120"/>
              <thead>
                <tr valign="top">
                  <td>Combination</td>
                  <td>Medications</td>
                  <td>Laboratory tests</td>
                  <td>Diagnosis codes</td>
                  <td>AUC<sup>a</sup>, DiD (95% CI)</td>
                  <td><italic>F</italic><sub>1</sub>-score, DiD (95% CI)</td>
                  <td><italic>P</italic> value</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Combination 1</td>
                  <td>Grouped</td>
                  <td>Baseline</td>
                  <td>Baseline</td>
                  <td>0.054 (0.052-0.057)</td>
                  <td>0.072 (0.069-0.074)</td>
                  <td>&#60;.001</td>
                </tr>
                <tr valign="top">
                  <td>Combination 2</td>
                  <td>Baseline</td>
                  <td>Grouped</td>
                  <td>Baseline</td>
                  <td>0.012 (0.010-0.014)</td>
                  <td>0.046 (0.043-0.049)</td>
                  <td>&#60;.001</td>
                </tr>
                <tr valign="top">
                  <td>Combination 3</td>
                  <td>Baseline</td>
                  <td>Baseline</td>
                  <td>Grouped</td>
                  <td>0.049 (0.047-0.051)</td>
                  <td>0.134 (0.131-0.137)</td>
                  <td>&#60;.001</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table6fn1">
                <p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>We investigated the effect that grouping EHR data using standard vocabularies has on ML model transferability during external validation. There are several novel and significant findings of our work. First, ML models for HAI detection with grouped features based on standard vocabularies resulted in a reduced drop in performance when validated on an external data set compared to baseline features. Second, there was significant heterogeneity of EHR data elements between health care systems, as 45% of data elements present in the training set were not present in the external set. Third, ML models developed from grouped data sets resulted in greater performance gains after external validation compared to data sets restricted to shared codes alone. Lastly, we found that grouping diagnosis codes and medications was important to model transferability when compared to laboratory tests.</p>
      <p>We demonstrated that grouping features using standard vocabularies improved model transferability during external validation. We found on average a 51% decrease and 65% decrease in the performance drop of AUC and <italic>F</italic><sub>1</sub>-score, respectively, during external validation when using grouped data compared to baseline data. This improvement in transferability can be attributed to better syntactic and semantic interoperability. Using grouped features allows the model to overcome the challenges of data heterogeneity, such as differences in coding practice and hospital formularies, that arise when using granular codes. A single feature from the grouped model can represent several distinct features from the baseline model (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Hence, this method can generalize to an unknown data set as no knowledge of the future data set is required when selecting features or training the model. Although the practice of grouping features is common, our study is novel in that to our knowledge, previous studies have not evaluated model transferability in an external data set when grouping features based on standard vocabularies.</p>
      <p>The data heterogeneity seen in our data highlights the difficulty when creating generalizable ML models. Shared codes accounted for 57% (4392/7708) of the ICD diagnosis codes used in Hospital A and 64% (4392/6859) of the ICD diagnosis codes used in Hospital B. To our knowledge, none have compared ICD code usage between hospitals. For several common conditions, there are numerous ICD diagnosis codes available. For example, diabetes mellitus type II has 56 ICD-9 and ICD-10 codes available [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]. Variation in coding practices between health care systems can result in several individual codes not being present in a given data set. Differences in laboratory practices or hospital medication formularies may also contribute to EHR data heterogeneity. Extensive feature engineering is typically performed to overcome this challenge before model development [<xref ref-type="bibr" rid="ref53">53</xref>]. Feature engineering, while creating highly relevant features for the given use case, represents a substantial barrier to model generalizability. Our study demonstrated that grouping features can overcome challenges created by data heterogeneity.</p>
      <p>In the first sensitivity analysis, we found that although models developed with granular data sets restricted to shared codes resulted in a reduced drop in performance when compared to a baseline model, models developed from grouped data sets resulted in an even smaller drop in performance. The models developed using grouped data sets resulted in an additional 41% decrease and 70% decrease in performance drop of AUC and <italic>F</italic><sub>1</sub>-score, respectively, during external validation on average. These results provide further evidence that grouping features using standard vocabularies produces greater benefits than just restricting features to those shared by other hospital systems.</p>
      <p>In the second sensitivity analysis, we found that the most important factors when improving transferability included grouping both diagnosis codes and medications. This result could be explained by the amount of information lost due to variation in coding practices and prescription preferences when using baseline data. Rasmy et al [<xref ref-type="bibr" rid="ref54">54</xref>] compared models using different representations of diagnosis codes in the EHR. The study found that models developed with data mapped to the Unified Medical Language System (UMLS) produced the highest AUC, whereas models developed with data mapped to CCS codes produced the lowest AUC. However, this previous study did not have an external data set to compare performance.</p>
      <p>Other studies have used various methods to improve model transferability, including transfer learning, deep learning, and anchor learning [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. Curth et al [<xref ref-type="bibr" rid="ref12">12</xref>] found that using transfer learning significantly increased model performance, where the AUC increased as much as from 4.7% to 7.0% depending on the use case. Although transfer learning has been shown to be successful, it requires models to be trained with data from the internal and external sites. Rasmy et al [<xref ref-type="bibr" rid="ref15">15</xref>] found an average drop of 3.6% in AUC when evaluating the generalizability of a recurrent neural network. In our study, we found the average drop in AUC to be 13% in models developed using baseline data but only 4% in models developed using grouped data. Kashyap et al [<xref ref-type="bibr" rid="ref13">13</xref>] found performance drops in both recall and precision when validating the model at an external site after using anchor learning. Our study evaluated a method to achieve comparable model transferability without requiring any knowledge of the external site or a deep learning model.</p>
      <p>Mapping data to CDMs can facilitate the sharing of data and models across institutions as seen in several recent studies [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref55">55</xref>]. Recent work, such as that from Tian et al [<xref ref-type="bibr" rid="ref9">9</xref>], has built frameworks for model sharing and generalizability that use CDMs in their pipeline [<xref ref-type="bibr" rid="ref17">17</xref>]. The use of a CDM involves mapping data to standard vocabularies as we did in our study, which addresses the problem of syntax by standardizing the vocabulary. In our study, we further address the problem of semantics, where different hospitals may use the same vocabulary, but coding practices may result in different codes representing the same condition.</p>
      <p>We acknowledge several limitations to this study. Our use case consisted of HAI detection in patients who underwent surgery. The benefit of grouping feature sets for ML development may not be consistent across other use cases. We only used EHR data elements for which there are standard vocabularies available, excluding features such as microbiology reports or clinical text. It is likely that including these additional features would improve ML model performance at the expense of requiring an extensive amount of feature engineering. We used Medi-Span, a proprietary vocabulary, as both hospital EHRs mapped medications to this system. Other vocabularies, such as RxNorm, could be used. There are several different terminologies that can be used to group diagnosis codes in addition to CCS, including UMLS, as was studied by Rasmy et al [<xref ref-type="bibr" rid="ref54">54</xref>]. Their work indicates that using UMLS to group diagnosis codes could produce an even smaller drop in performance than we found with CCS. This method would be a valuable investigation for future studies that could lead to even greater results. The terminologies and levels chosen for our study could be modified for different use cases.</p>
      <p>This study has substantial implications for the application of ML models to clinical practice. Significant improvements in patient care can be achieved with ML models as demonstrated in previous studies [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>]. However, external validation remains one of the most serious barriers to the widespread use of ML models in clinical practice [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. We found that 2 independent hospitals only shared 55% of baseline EHR data elements, highlighting the difficulty when creating generalizable ML models. Current practices to overcome the data heterogeneity between data sets involve extensive feature engineering, which is burdensome during model deployment at a new health care system where EHR data elements are not mapped to a CDM [<xref ref-type="bibr" rid="ref59">59</xref>]. We demonstrated the novel finding that grouping features with standard vocabularies can overcome the challenge of data heterogeneity and improve ML model performance in external data sets. The method of grouping features based on standard vocabularies will improve the transferability of models, allowing for more widespread use of these ML models between health care systems.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplemental tables and figures.</p>
        <media xlink:href="medinform_v10i8e39057_app1.docx" xlink:title="DOCX File , 1175 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ACS</term>
          <def>
            <p>American College of Surgeons</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CCS</term>
          <def>
            <p>Clinical Classification Software</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CDM</term>
          <def>
            <p>common data model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">DiD</term>
          <def>
            <p>difference-in-difference</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">EHR</term>
          <def>
            <p>electronic healthcare records</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">HAI</term>
          <def>
            <p>health care–associated infection</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">LOINC</term>
          <def>
            <p>Logical Observation Identifiers Names and Codes</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">NSQIP</term>
          <def>
            <p>National Surgical Quality Improvement Program</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">SSI</term>
          <def>
            <p>surgical site infection</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">UMLS</term>
          <def>
            <p>Unified Medical Language System4^</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">UTI</term>
          <def>
            <p>urinary tract infection</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by a training grant (T15LM007124) from the National Library of Medicine (ACK) and a grant (1K08HS025776) from the Agency for Healthcare Research and Quality (BB). The computational resources used were partially funded by the National Institutes of Health Shared Instrumentation (grant 1S10OD021644-01A1). The National Institutes of Health and the Agency for Healthcare Research and Quality had no role in the design and conduct of the study; collection, management, analysis, and interpretation of the data; preparation, review, or approval of the manuscript; and decision to submit the manuscript for publication.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The code for this article can be found in the public GitHub repository amberkiser/MachineLearningTransferability. The data underlying this article cannot be shared publicly as it contains protected health information.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>ACK contributed to methodology; writing the code and performing the analysis (software); formal analysis; and writing—original draft, review, and editing. KE contributed to writing—original draft, review, and editing—and supervision. JPF provided resources and contributed to data curation and writing—review and editing. DES provided resources and contributed to data curation and writing—review and editing. MHS provided resources and contributed to data curation and writing—review and editing. BB contributed to conceptualization; methodology; writing—original draft, review, and editing; and supervision.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Danciu</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Cowan</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Basford</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Saip</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Osgood</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shirey-Rice</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kirby</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>Secondary use of clinical data: the Vanderbilt approach</article-title>
          <source>J Biomed Inform</source>
          <year>2014</year>
          <month>12</month>
          <volume>52</volume>
          <fpage>28</fpage>
          <lpage>35</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(14)00039-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2014.02.003</pub-id>
          <pub-id pub-id-type="medline">24534443</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(14)00039-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4133331</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meystre</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gouripeddi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tieder</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Simmons</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Srivastava</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Enhancing comparative effectiveness research with automated pediatric pneumonia detection in a multi-institutional clinical repository: a PHIS+ pilot study</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>05</month>
          <day>15</day>
          <volume>19</volume>
          <issue>5</issue>
          <fpage>e162</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2017/5/e162/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.6887</pub-id>
          <pub-id pub-id-type="medline">28506958</pub-id>
          <pub-id pub-id-type="pii">v19i5e162</pub-id>
          <pub-id pub-id-type="pmcid">PMC5447826</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yi</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Predicting post-stroke pneumonia using deep neural network approaches</article-title>
          <source>Int J Med Inform</source>
          <year>2019</year>
          <month>12</month>
          <volume>132</volume>
          <fpage>103986</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1386-5056(18)31272-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2019.103986</pub-id>
          <pub-id pub-id-type="medline">31629312</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(18)31272-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Desautels</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Calvert</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jay</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kerem</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shieh</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Shimabukuro</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chettipally</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Feldman</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Barton</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wales</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Prediction of sepsis in the intensive care unit with minimal electronic health record data: a machine learning approach</article-title>
          <source>JMIR Med Inform</source>
          <year>2016</year>
          <month>09</month>
          <day>30</day>
          <volume>4</volume>
          <issue>3</issue>
          <fpage>e28</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2016/3/e28/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.5909</pub-id>
          <pub-id pub-id-type="medline">27694098</pub-id>
          <pub-id pub-id-type="pii">v4i3e28</pub-id>
          <pub-id pub-id-type="pmcid">PMC5065680</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence-based multimodal risk assessment model for surgical site infection (AMRAMS): development and validation study</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>06</month>
          <day>15</day>
          <volume>8</volume>
          <issue>6</issue>
          <fpage>e18186</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/6/e18186/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/18186</pub-id>
          <pub-id pub-id-type="medline">32538798</pub-id>
          <pub-id pub-id-type="pii">v8i6e18186</pub-id>
          <pub-id pub-id-type="pmcid">PMC7325005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Siontis</surname>
              <given-names>GCM</given-names>
            </name>
            <name name-style="western">
              <surname>Tzoulaki</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Castaldi</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ioannidis</surname>
              <given-names>JPA</given-names>
            </name>
          </person-group>
          <article-title>External validation of new risk prediction models is infrequent and reveals worse prognostic discrimination</article-title>
          <source>J Clin Epidemiol</source>
          <year>2015</year>
          <month>01</month>
          <volume>68</volume>
          <issue>1</issue>
          <fpage>25</fpage>
          <lpage>34</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2014.09.007</pub-id>
          <pub-id pub-id-type="medline">25441703</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(14)00353-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>ND</given-names>
            </name>
            <name name-style="western">
              <surname>Steyerberg</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Kent</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>Big data and predictive analytics: recalibrating expectations</article-title>
          <source>JAMA</source>
          <year>2018</year>
          <month>07</month>
          <day>03</day>
          <volume>320</volume>
          <issue>1</issue>
          <fpage>27</fpage>
          <lpage>28</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2018.5602</pub-id>
          <pub-id pub-id-type="medline">29813156</pub-id>
          <pub-id pub-id-type="pii">2683125</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Navar</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Pencina</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ioannidis</surname>
              <given-names>JPA</given-names>
            </name>
          </person-group>
          <article-title>Opportunities and challenges in developing risk prediction models with electronic health records data: a systematic review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2017</year>
          <month>01</month>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>198</fpage>
          <lpage>208</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27189013"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocw042</pub-id>
          <pub-id pub-id-type="medline">27189013</pub-id>
          <pub-id pub-id-type="pii">ocw042</pub-id>
          <pub-id pub-id-type="pmcid">PMC5201180</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Establishment and evaluation of a multicenter collaborative prediction model construction framework supporting model generalization and continuous improvement: a pilot study</article-title>
          <source>Int J Med Inform</source>
          <year>2020</year>
          <month>09</month>
          <volume>141</volume>
          <fpage>104173</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2020.104173</pub-id>
          <pub-id pub-id-type="medline">32531725</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(20)30136-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Steyerberg</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Harrell</surname>
              <given-names>FE</given-names>
            </name>
          </person-group>
          <article-title>Prediction models need appropriate internal, internal-external, and external validation</article-title>
          <source>J Clin Epidemiol</source>
          <year>2016</year>
          <month>01</month>
          <volume>69</volume>
          <fpage>245</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25981519"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2015.04.005</pub-id>
          <pub-id pub-id-type="medline">25981519</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(15)00175-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC5578404</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Office of the National Coordinator for Health Information Technology</collab>
          </person-group>
          <article-title>Certified health IT developers and editions reported by health care professionals participating in the Medicare EHR Incentive Program</article-title>
          <source>HealthIT</source>
          <year>2017</year>
          <month>07</month>
          <access-date>2021-12-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.healthit.gov/data/quickstats/health-care-professional-health-it-developers">https://www.healthit.gov/data/quickstats/health-care-professional-health-it-developers</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Curth</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Thoral</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>van den Wildenberg</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bijlstra</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>de Bruin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Elbers</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fornasa</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Transferring clinical prediction models across hospitals and electronic health record systems</article-title>
          <year>2020</year>
          <month>03</month>
          <day>28</day>
          <conf-name>ECML PKDD 2019: Machine Learning and Knowledge Discovery in Databases</conf-name>
          <conf-date>September 16-20, 2019</conf-date>
          <conf-loc>Würzburg, Germany</conf-loc>
          <fpage>605</fpage>
          <lpage>621</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-030-43823-4_48</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kashyap</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Seneviratne</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Banda</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Falconer</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ryu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>Development and validation of phenotype classifiers across multiple sites in the observational health data sciences and informatics network</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>06</month>
          <day>01</day>
          <volume>27</volume>
          <issue>6</issue>
          <fpage>877</fpage>
          <lpage>883</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32374408"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa032</pub-id>
          <pub-id pub-id-type="medline">32374408</pub-id>
          <pub-id pub-id-type="pii">5831103</pub-id>
          <pub-id pub-id-type="pmcid">PMC7309227</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hassanzadeh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Karimi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Transferability of artificial neural networks for clinical document classification across hospitals: a case study on abnormality detection from radiology reports</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>09</month>
          <volume>85</volume>
          <fpage>68</fpage>
          <lpage>79</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30143-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.07.017</pub-id>
          <pub-id pub-id-type="medline">30026067</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30143-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rasmy</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Geng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhi</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A study of generalizability of recurrent neural network-based predictive models for heart failure onset risk using a large and heterogeneous EHR data set</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>08</month>
          <volume>84</volume>
          <fpage>11</fpage>
          <lpage>16</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30117-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.06.011</pub-id>
          <pub-id pub-id-type="medline">29908902</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30117-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC6076336</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Semi-supervised learning to improve generalizability of risk prediction models</article-title>
          <source>J Biomed Inform</source>
          <year>2019</year>
          <month>04</month>
          <volume>92</volume>
          <fpage>103117</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(19)30035-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2019.103117</pub-id>
          <pub-id pub-id-type="medline">30738948</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(19)30035-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reps</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Schuemie</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Suchard</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Rijnbeek</surname>
              <given-names>PR</given-names>
            </name>
          </person-group>
          <article-title>Design and implementation of a standardized framework to generate and evaluate patient-level prediction models using observational healthcare data</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2018</year>
          <month>08</month>
          <day>01</day>
          <volume>25</volume>
          <issue>8</issue>
          <fpage>969</fpage>
          <lpage>975</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29718407"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocy032</pub-id>
          <pub-id pub-id-type="medline">29718407</pub-id>
          <pub-id pub-id-type="pii">4989437</pub-id>
          <pub-id pub-id-type="pmcid">PMC6077830</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ramspek</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Jager</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dekker</surname>
              <given-names>FW</given-names>
            </name>
            <name name-style="western">
              <surname>Zoccali</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>van Diepen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>External validation of prognostic models: what, why, how, when and where?</article-title>
          <source>Clin Kidney J</source>
          <year>2021</year>
          <month>01</month>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>49</fpage>
          <lpage>58</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/ckj/article-lookup/doi/10.1093/ckj/sfaa188"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/ckj/sfaa188</pub-id>
          <pub-id pub-id-type="medline">33564405</pub-id>
          <pub-id pub-id-type="pii">sfaa188</pub-id>
          <pub-id pub-id-type="pmcid">PMC7857818</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <article-title>Clinical Classification Software (CCS) for ICD-9-CM</article-title>
          <source>Healthcare Cost and Utilization Project</source>
          <year>2017</year>
          <month>03</month>
          <access-date>2020-06-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.hcup-us.ahrq.gov/toolssoftware/ccs/ccs.jsp">https://www.hcup-us.ahrq.gov/toolssoftware/ccs/ccs.jsp</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <article-title>Clinical Classifications Software Refined (CCSR) for ICD-10-CM Diagnoses</article-title>
          <source>Healthcare Cost and Utilization Project</source>
          <year>2022</year>
          <month>02</month>
          <access-date>2022-07-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.hcup-us.ahrq.gov/toolssoftware/ccsr/dxccsr.jsp">https://www.hcup-us.ahrq.gov/toolssoftware/ccsr/dxccsr.jsp</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <article-title>LOINC from Regenstrief</article-title>
          <source>LOINC</source>
          <access-date>2020-06-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://loinc.org/">https://loinc.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Forrey</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>DeMoor</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Huff</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Leavelle</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Leland</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Fiers</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Charles</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Stalling</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Tullis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hutchins</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Baenziger</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Logical observation identifier names and codes (LOINC) database: a public use set of codes and names for electronic reporting of clinical laboratory test results</article-title>
          <source>Clin Chem</source>
          <year>1996</year>
          <month>01</month>
          <volume>42</volume>
          <issue>1</issue>
          <fpage>81</fpage>
          <lpage>90</lpage>
          <pub-id pub-id-type="medline">8565239</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <article-title>Medi-Span: power up your drug decisions with smart data</article-title>
          <source>Wolters Kluwer</source>
          <access-date>2020-06-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.wolterskluwer.com/en/solutions/medi-span">https://www.wolterskluwer.com/en/solutions/medi-span</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bucher</surname>
              <given-names>BT</given-names>
            </name>
            <name name-style="western">
              <surname>Ferraro</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Finlayson</surname>
              <given-names>SRG</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Gundlapalli</surname>
              <given-names>AV</given-names>
            </name>
          </person-group>
          <article-title>Use of computerized provider order entry events for postoperative complication surveillance</article-title>
          <source>JAMA Surg</source>
          <year>2019</year>
          <month>04</month>
          <day>01</day>
          <volume>154</volume>
          <issue>4</issue>
          <fpage>311</fpage>
          <lpage>318</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30586132"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamasurg.2018.4874</pub-id>
          <pub-id pub-id-type="medline">30586132</pub-id>
          <pub-id pub-id-type="pii">2719461</pub-id>
          <pub-id pub-id-type="pmcid">PMC6484794</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Vries</surname>
              <given-names>EN</given-names>
            </name>
            <name name-style="western">
              <surname>Prins</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Crolla</surname>
              <given-names>RMPH</given-names>
            </name>
            <name name-style="western">
              <surname>den Outer</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>van Andel</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>van Helden</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Schlack</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>van Putten</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Gouma</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dijkgraaf</surname>
              <given-names>MGW</given-names>
            </name>
            <name name-style="western">
              <surname>Smorenburg</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Boermeester</surname>
              <given-names>MA</given-names>
            </name>
            <collab>SURPASS Collaborative Group</collab>
          </person-group>
          <article-title>Effect of a comprehensive surgical safety system on patient outcomes</article-title>
          <source>N Engl J Med</source>
          <year>2010</year>
          <month>11</month>
          <day>11</day>
          <volume>363</volume>
          <issue>20</issue>
          <fpage>1928</fpage>
          <lpage>37</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMsa0911535</pub-id>
          <pub-id pub-id-type="medline">21067384</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ghaferi</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Birkmeyer</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Dimick</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>Variation in hospital mortality associated with inpatient surgery</article-title>
          <source>N Engl J Med</source>
          <year>2009</year>
          <month>10</month>
          <day>01</day>
          <volume>361</volume>
          <issue>14</issue>
          <fpage>1368</fpage>
          <lpage>75</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMsa0903048</pub-id>
          <pub-id pub-id-type="medline">19797283</pub-id>
          <pub-id pub-id-type="pii">361/14/1368</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Merkow</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Ju</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>MV</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Bilimoria</surname>
              <given-names>KY</given-names>
            </name>
          </person-group>
          <article-title>Underlying reasons associated with hospital readmission following surgery in the United States</article-title>
          <source>JAMA</source>
          <year>2015</year>
          <month>02</month>
          <day>03</day>
          <volume>313</volume>
          <issue>5</issue>
          <fpage>483</fpage>
          <lpage>95</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2014.18614</pub-id>
          <pub-id pub-id-type="medline">25647204</pub-id>
          <pub-id pub-id-type="pii">2107788</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>BH</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bilimoria</surname>
              <given-names>KY</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>CY</given-names>
            </name>
          </person-group>
          <article-title>Does surgical quality improve in the American College of Surgeons National Surgical Quality Improvement Program: an evaluation of all participating hospitals</article-title>
          <source>Ann Surg</source>
          <year>2009</year>
          <month>09</month>
          <volume>250</volume>
          <issue>3</issue>
          <fpage>363</fpage>
          <lpage>76</lpage>
          <pub-id pub-id-type="doi">10.1097/SLA.0b013e3181b4148f</pub-id>
          <pub-id pub-id-type="medline">19644350</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pruitt</surname>
              <given-names>LCC</given-names>
            </name>
            <name name-style="western">
              <surname>Luppens</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Ferraro</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Gundlapalli</surname>
              <given-names>AV</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Bucher</surname>
              <given-names>BT</given-names>
            </name>
          </person-group>
          <article-title>Using natural language processing to improve EHR structured data-based surgical site infection surveillance</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2019</year>
          <volume>2019</volume>
          <fpage>794</fpage>
          <lpage>803</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32308875"/>
          </comment>
          <pub-id pub-id-type="medline">32308875</pub-id>
          <pub-id pub-id-type="pmcid">PMC7153106</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>GJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wick</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Abe-Jones</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Najafi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sheka</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tourani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Skube</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>GB</given-names>
            </name>
          </person-group>
          <article-title>Applying machine learning across sites: external validation of a surgical site infection detection algorithm</article-title>
          <source>J Am Coll Surg</source>
          <year>2021</year>
          <month>06</month>
          <volume>232</volume>
          <issue>6</issue>
          <fpage>963</fpage>
          <lpage>971.e1</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33831539"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jamcollsurg.2021.03.026</pub-id>
          <pub-id pub-id-type="medline">33831539</pub-id>
          <pub-id pub-id-type="pii">S1072-7515(21)00242-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC8679130</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Hart</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Hoyt</surname>
              <given-names>DB</given-names>
            </name>
          </person-group>
          <article-title>The American College of Surgeons National Surgical Quality Improvement Program: achieving better and safer surgery</article-title>
          <source>Jt Comm J Qual Patient Saf</source>
          <year>2015</year>
          <month>05</month>
          <volume>41</volume>
          <issue>5</issue>
          <fpage>199</fpage>
          <lpage>204</lpage>
          <pub-id pub-id-type="doi">10.1016/s1553-7250(15)41026-8</pub-id>
          <pub-id pub-id-type="medline">25977246</pub-id>
          <pub-id pub-id-type="pii">S1553-7250(15)41026-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shiloach</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Frencher</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Steeger</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Rowell</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Bartzokis</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tomeh</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>BL</given-names>
            </name>
          </person-group>
          <article-title>Toward robust information: data quality and inter-rater reliability in the American College of Surgeons National Surgical Quality Improvement Program</article-title>
          <source>J Am Coll Surg</source>
          <year>2010</year>
          <month>01</month>
          <volume>210</volume>
          <issue>1</issue>
          <fpage>6</fpage>
          <lpage>16</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jamcollsurg.2009.09.031</pub-id>
          <pub-id pub-id-type="medline">20123325</pub-id>
          <pub-id pub-id-type="pii">S1072-7515(09)01408-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dencker</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>Bonde</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Troelsen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Varadarajan</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Sillesen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Postoperative complications: an observational study of trends in the United States from 2012 to 2018</article-title>
          <source>BMC Surg</source>
          <year>2021</year>
          <month>11</month>
          <day>06</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>393</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcsurg.biomedcentral.com/articles/10.1186/s12893-021-01392-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12893-021-01392-z</pub-id>
          <pub-id pub-id-type="medline">34740362</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12893-021-01392-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC8571843</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Mourik</surname>
              <given-names>MSM</given-names>
            </name>
            <name name-style="western">
              <surname>van Duijn</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Moons</surname>
              <given-names>KGM</given-names>
            </name>
            <name name-style="western">
              <surname>Bonten</surname>
              <given-names>MJM</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>GM</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of administrative data for surveillance of healthcare-associated infections: a systematic review</article-title>
          <source>BMJ Open</source>
          <year>2015</year>
          <month>08</month>
          <day>27</day>
          <volume>5</volume>
          <issue>8</issue>
          <fpage>e008424</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=26316651"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2015-008424</pub-id>
          <pub-id pub-id-type="medline">26316651</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2015-008424</pub-id>
          <pub-id pub-id-type="pmcid">PMC4554897</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Redondo-González</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Tenías</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Arias</surname>
              <given-names>Á</given-names>
            </name>
            <name name-style="western">
              <surname>Lucendo</surname>
              <given-names>AJ</given-names>
            </name>
          </person-group>
          <article-title>Validity and reliability of administrative coded data for the identification of hospital-acquired infections: an updated systematic review with meta-analysis and meta-regression analysis</article-title>
          <source>Health Serv Res</source>
          <year>2018</year>
          <month>06</month>
          <volume>53</volume>
          <issue>3</issue>
          <fpage>1919</fpage>
          <lpage>1956</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28397261"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/1475-6773.12691</pub-id>
          <pub-id pub-id-type="medline">28397261</pub-id>
          <pub-id pub-id-type="pmcid">PMC5980352</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Arsoniadis</surname>
              <given-names>EG</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kwaan</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>GJ</given-names>
            </name>
          </person-group>
          <article-title>Strategies for handling missing clinical data for automated surgical site infection detection from the electronic health record</article-title>
          <source>J Biomed Inform</source>
          <year>2017</year>
          <month>04</month>
          <volume>68</volume>
          <fpage>112</fpage>
          <lpage>120</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30058-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.03.009</pub-id>
          <pub-id pub-id-type="medline">28323112</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30058-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC5474942</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Mach Learn</source>
          <year>2001</year>
          <month>10</month>
          <volume>45</volume>
          <fpage>5</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>CJ</given-names>
            </name>
          </person-group>
          <article-title>LIBSVM: a library for support vector machines</article-title>
          <source>ACM Trans Intell Syst Technol</source>
          <year>2011</year>
          <month>04</month>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>1</fpage>
          <lpage>27</lpage>
          <pub-id pub-id-type="doi">10.1145/1961189.1961199</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Platt</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Probabilistic outputs for support vector machines and comparisons to regularized likelihood methods</article-title>
          <source>Advances in large margin classifiers</source>
          <year>1999</year>
          <month>03</month>
          <day>26</day>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>61</fpage>
          <lpage>74</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://home.cs.colorado.edu/~mozer/Teaching/syllabi/6622/papers/Platt1999.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stoltzfus</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Logistic regression: a brief primer</article-title>
          <source>Acad Emerg Med</source>
          <year>2011</year>
          <month>10</month>
          <volume>18</volume>
          <issue>10</issue>
          <fpage>1099</fpage>
          <lpage>104</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1111/j.1553-2712.2011.01185.x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1553-2712.2011.01185.x</pub-id>
          <pub-id pub-id-type="medline">21996075</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>XGBoost: a scalable tree boosting system</article-title>
          <year>2016</year>
          <month>08</month>
          <day>13</day>
          <conf-name>KDD '16: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>August 13-17, 2016</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <fpage>785</fpage>
          <lpage>794</lpage>
          <pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hanley</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>McNeil</surname>
              <given-names>BJ</given-names>
            </name>
          </person-group>
          <article-title>The meaning and use of the area under a receiver operating characteristic (ROC) curve</article-title>
          <source>Radiology</source>
          <year>1982</year>
          <month>04</month>
          <volume>143</volume>
          <issue>1</issue>
          <fpage>29</fpage>
          <lpage>36</lpage>
          <pub-id pub-id-type="doi">10.1148/radiology.143.1.7063747</pub-id>
          <pub-id pub-id-type="medline">7063747</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goutte</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gaussier</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>A probabilistic interpretation of precision, recall and F-score, with implication for evaluation</article-title>
          <year>2005</year>
          <conf-name>ECIR 2005: Advances in Information Retrieval</conf-name>
          <conf-date>March 21-23, 2005</conf-date>
          <conf-loc>Santiago de Compostela, Spain</conf-loc>
          <fpage>345</fpage>
          <lpage>359</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-540-31865-1_25</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hasanin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Khoshgoftaar</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>The effects of random undersampling with simulated class imbalance for big data</article-title>
          <year>2018</year>
          <month>08</month>
          <day>06</day>
          <conf-name>2018 IEEE International Conference on Information Reuse and Integration (IRI)</conf-name>
          <conf-date>06-09 July, 2018</conf-date>
          <conf-loc>Salt Lake City, UT</conf-loc>
          <fpage>6</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1109/iri.2018.00018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lemaître</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nogueira</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Aridas</surname>
              <given-names>CK</given-names>
            </name>
          </person-group>
          <article-title>Imbalanced-learn: a python toolbox to tackle the curse of imbalanced datasets in machine learning</article-title>
          <source>J Mach Learn Res</source>
          <year>2017</year>
          <month>01</month>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>559</fpage>
          <lpage>563</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/3122009.3122026"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perrot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duchesnay</surname>
              <given-names>É</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>J Mach Learn Res</source>
          <year>2011</year>
          <month>10</month>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>2830</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schwerdt</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Woessmann</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Bradley</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Chapter 1 - Empirical methods in the economics of education</article-title>
          <source>The Economics of Education. 2nd ed</source>
          <year>2020</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>Academic Press</publisher-name>
          <fpage>3</fpage>
          <lpage>20</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Margolis</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bilker</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Boston</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Localio</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Berlin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Statistical characteristics of area under the receiver operating characteristic curve for a simple prognostic model using traditional and bootstrapped approaches</article-title>
          <source>J Clin Epidemiol</source>
          <year>2002</year>
          <month>5</month>
          <volume>55</volume>
          <issue>5</issue>
          <fpage>518</fpage>
          <lpage>524</lpage>
          <pub-id pub-id-type="doi">10.1016/s0895-4356(01)00512-1</pub-id>
          <pub-id pub-id-type="medline">12007556</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Goodacre</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>On splitting training and validation set: a comparative study of cross-validation, bootstrap and systematic sampling for estimating the generalization performance of supervised learning</article-title>
          <source>J Anal Test</source>
          <year>2018</year>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>249</fpage>
          <lpage>262</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30842888"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s41664-018-0068-2</pub-id>
          <pub-id pub-id-type="medline">30842888</pub-id>
          <pub-id pub-id-type="pii">68</pub-id>
          <pub-id pub-id-type="pmcid">PMC6373628</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Virtanen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gommers</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Oliphant</surname>
              <given-names>TE</given-names>
            </name>
            <name name-style="western">
              <surname>Haberland</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Reddy</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Burovski</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weckesser</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bright</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>van der Walt</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brett</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Millman</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mayorov</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>ARJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kern</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Carey</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Polat</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>VanderPlas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Laxalde</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Perktold</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cimrman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Henriksen</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Quintero</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Archibald</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Ribeiro</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>van Mulbregt</surname>
              <given-names>P</given-names>
            </name>
            <collab>SciPy 1.0 Contributors</collab>
          </person-group>
          <article-title>SciPy 1.0: fundamental algorithms for scientific computing in Python</article-title>
          <source>Nat Methods</source>
          <year>2020</year>
          <month>03</month>
          <day>3</day>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>261</fpage>
          <lpage>272</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32015543"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41592-019-0686-2</pub-id>
          <pub-id pub-id-type="medline">32015543</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41592-019-0686-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7056644</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="web">
          <article-title>2014 ICD-9-CM Diagnosis Codes: diabetes mellitus 250</article-title>
          <source>ICD9Data</source>
          <access-date>2020-06-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.icd9data.com/2014/Volume1/240-279/249-259/250/default.htm">http://www.icd9data.com/2014/Volume1/240-279/249-259/250/default.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <article-title>ICD-10-CM Section E08-E13: diabetes mellitus</article-title>
          <source>ICD.Codes</source>
          <access-date>2020-06-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://icd.codes/icd10cm/chapter4/E08-E13">https://icd.codes/icd10cm/chapter4/E08-E13</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Romero-Brufau</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Whitford</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Hickman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Morlan</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Therneau</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Naessens</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huddleston</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Using machine learning to improve the accuracy of patient deterioration predictions: Mayo Clinic Early Warning Score (MC-EWS)</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>06</month>
          <day>12</day>
          <volume>28</volume>
          <issue>6</issue>
          <fpage>1207</fpage>
          <lpage>1215</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33638343"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa347</pub-id>
          <pub-id pub-id-type="medline">33638343</pub-id>
          <pub-id pub-id-type="pii">6151565</pub-id>
          <pub-id pub-id-type="pmcid">PMC8661441</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rasmy</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tiryaki</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhi</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Representation of EHR data for predictive modeling: a comparison between UMLS and other terminologies</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>10</month>
          <day>01</day>
          <volume>27</volume>
          <issue>10</issue>
          <fpage>1593</fpage>
          <lpage>1599</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32930711"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa180</pub-id>
          <pub-id pub-id-type="medline">32930711</pub-id>
          <pub-id pub-id-type="pii">5905876</pub-id>
          <pub-id pub-id-type="pmcid">PMC7647355</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kostka</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Posada</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Seo</surname>
              <given-names>SI</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>DY</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Roh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chae</surname>
              <given-names>SG</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Son</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Reich</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rijnbeek</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>SC</given-names>
            </name>
          </person-group>
          <article-title>Prediction of major depressive disorder following beta-blocker therapy in patients with cardiovascular diseases</article-title>
          <source>J Pers Med</source>
          <year>2020</year>
          <month>12</month>
          <day>18</day>
          <volume>10</volume>
          <issue>4</issue>
          <fpage>288</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=jpm10040288"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/jpm10040288</pub-id>
          <pub-id pub-id-type="medline">33352870</pub-id>
          <pub-id pub-id-type="pii">jpm10040288</pub-id>
          <pub-id pub-id-type="pmcid">PMC7766565</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Le</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Barton</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Fitzgerald</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pellegrini</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Calvert</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Pediatric severe sepsis prediction using machine learning</article-title>
          <source>Front Pediatr</source>
          <year>2019</year>
          <volume>7</volume>
          <fpage>413</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fped.2019.00413"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fped.2019.00413</pub-id>
          <pub-id pub-id-type="medline">31681711</pub-id>
          <pub-id pub-id-type="pmcid">PMC6798083</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lindberg</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Prosperi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bjarnadottir</surname>
              <given-names>RI</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Crane</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shear</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Solberg</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Snigurska</surname>
              <given-names>UA</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lucero</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Identification of important factors in an inpatient fall risk prediction model to improve the quality of care using EHR and electronic administrative data: a machine-learning approach</article-title>
          <source>Int J Med Inform</source>
          <year>2020</year>
          <month>11</month>
          <volume>143</volume>
          <fpage>104272</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1386-5056(20)30702-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2020.104272</pub-id>
          <pub-id pub-id-type="medline">32980667</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(20)30702-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC8562928</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spasic</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nenadic</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Clinical text data in machine learning: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>03</month>
          <day>31</day>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>e17984</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/3/e17984/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17984</pub-id>
          <pub-id pub-id-type="medline">32229465</pub-id>
          <pub-id pub-id-type="pii">v8i3e17984</pub-id>
          <pub-id pub-id-type="pmcid">PMC7157505</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garza</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>del Fiol</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Tenenbaum</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Walden</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zozus</surname>
              <given-names>MN</given-names>
            </name>
          </person-group>
          <article-title>Evaluating common data models for use with a longitudinal community registry</article-title>
          <source>J Biomed Inform</source>
          <year>2016</year>
          <month>12</month>
          <volume>64</volume>
          <fpage>333</fpage>
          <lpage>341</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(16)30153-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2016.10.016</pub-id>
          <pub-id pub-id-type="medline">27989817</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(16)30153-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC6810649</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
