<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i7e16129</article-id>
      <article-id pub-id-type="pmid">32479414</article-id>
      <article-id pub-id-type="doi">10.2196/16129</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Precision Health–Enabled Machine Learning to Identify Need for Wraparound Social Services Using Patient- and Population-Level Data Sets: Algorithm Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Davison</surname>
            <given-names>Karen</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>ten Klooster</surname>
            <given-names>Iris</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sheon</surname>
            <given-names>Amy</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mandl</surname>
            <given-names>Ken</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Kasthurirathne</surname>
            <given-names>Suranga N</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Center for Biomedical Informatics</institution>
            <institution>Regenstrief Institute</institution>
            <addr-line>1101 W 10th Street</addr-line>
            <addr-line>Indianapolis, IN, 46202</addr-line>
            <country>United States</country>
            <phone>1 3172749000</phone>
            <email>snkasthu@iu.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6630-4598</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Grannis</surname>
            <given-names>Shaun</given-names>
          </name>
          <degrees>MS, MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8093-6639</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Halverson</surname>
            <given-names>Paul K</given-names>
          </name>
          <degrees>DrPH, FACHE</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7684-4363</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Morea</surname>
            <given-names>Justin</given-names>
          </name>
          <degrees>MS, MBA, DO</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3884-8317</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Menachemi</surname>
            <given-names>Nir</given-names>
          </name>
          <degrees>MPH, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3411-2700</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Vest</surname>
            <given-names>Joshua R</given-names>
          </name>
          <degrees>MPH, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7226-9688</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Center for Biomedical Informatics</institution>
        <institution>Regenstrief Institute</institution>
        <addr-line>Indianapolis, IN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>School of Medicine</institution>
        <institution>Indiana University</institution>
        <addr-line>Indianapolis, IN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Richard M Fairbanks School of Public Health</institution>
        <institution>Indiana University</institution>
        <addr-line>Indianapolis, IN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Eskenazi Health</institution>
        <addr-line>Indianapolis, IN</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Suranga N Kasthurirathne <email>snkasthu@iu.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>9</day>
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>7</issue>
      <elocation-id>e16129</elocation-id>
      <history>
        <date date-type="received">
          <day>8</day>
          <month>9</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>12</day>
          <month>12</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>3</day>
          <month>2</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>9</day>
          <month>4</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Suranga N Kasthurirathne, Shaun Grannis, Paul K Halverson, Justin Morea, Nir Menachemi, Joshua R Vest. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 09.07.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2020/7/e16129" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Emerging interest in precision health and the increasing availability of patient- and population-level data sets present considerable potential to enable analytical approaches to identify and mitigate the negative effects of social factors on health. These issues are not satisfactorily addressed in typical medical care encounters, and thus, opportunities to improve health outcomes, reduce costs, and improve coordination of care are not realized. Furthermore, methodological expertise on the use of varied patient- and population-level data sets and machine learning to predict need for supplemental services is limited.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The objective of this study was to leverage a comprehensive range of clinical, behavioral, social risk, and social determinants of health factors in order to develop decision models capable of identifying patients in need of various wraparound social services.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used comprehensive patient- and population-level data sets to build decision models capable of predicting need for behavioral health, dietitian, social work, or other social service referrals within a safety-net health system using area under the receiver operating characteristic curve (AUROC), sensitivity, precision, F1 score, and specificity. We also evaluated the value of population-level social determinants of health data sets in improving machine learning performance of the models.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Decision models for each wraparound service demonstrated performance measures ranging between 59.2%% and 99.3%. These results were statistically superior to the performance measures demonstrated by our previous models which used a limited data set and whose performance measures ranged from 38.2% to 88.3% (behavioural health: F1 score <italic>P</italic>&#60;.001, AUROC <italic>P</italic>=.01; social work: F1 score <italic>P</italic>&#60;.001, AUROC <italic>P</italic>=.03; dietitian: F1 score <italic>P</italic>=.001, AUROC <italic>P</italic>=.001; other: F1 score <italic>P</italic>=.01, AUROC <italic>P</italic>=.02); however, inclusion of additional population-level social determinants of health did not contribute to any performance improvements (behavioural health: F1 score <italic>P</italic>=.08, AUROC <italic>P</italic>=.09; social work: F1 score <italic>P</italic>=.16, AUROC <italic>P</italic>=.09; dietitian: F1 score <italic>P</italic>=.08, AUROC <italic>P</italic>=.14; other: F1 score <italic>P</italic>=.33, AUROC <italic>P</italic>=.21) in predicting the need for referral in our population of vulnerable patients seeking care at a safety-net provider.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Precision health–enabled decision models that leverage a wide range of patient- and population-level data sets and advanced machine learning methods are capable of predicting need for various wraparound social services with good performance.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>social determinants of health</kwd>
        <kwd>supervised machine learning</kwd>
        <kwd>delivery of health care</kwd>
        <kwd>integrated</kwd>
        <kwd>wraparound social services</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>The combination of precision health [<xref ref-type="bibr" rid="ref1">1</xref>] and population health initiatives in the United States have raised awareness about how clinical, behavioral, social risk, and social determinants of health factors influence an individual’s use of medical services and their overall health and well-being [<xref ref-type="bibr" rid="ref2">2</xref>]. Large-scale adoption of health information systems [<xref ref-type="bibr" rid="ref3">3</xref>], increased use of interoperable health information exchange, and the availability of socioeconomic data sets have led to unprecedented and ever increasing accessibility to various patient- and population-level data sources. The availability of these data sets, together with a focus on mitigating patient social factors and uptake of machine learning solutions for health care present considerable potential for predictive modeling in support of risk prediction and intervention allocation [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. This is particularly significant for wraparound services that can enhance primary care by utilizing providers who are trained in behavioral health, social work, nutritional counseling, patient navigation, health education, and medical legal partnerships in order to mitigate the effects of social risk and to address social needs [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
        <p>Wraparound services focus on the socioeconomic, behavioral, and financial factors that typical medical care encounters cannot address satisfactorily [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>], and when used, can result in improved health care outcomes, reduced costs [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref9">9</xref>], and better coordination of care. As such, these services are of significant importance to health care organizations that are incentivized by United States reimbursement policies to mitigate the effects of social issues that influence poor health outcomes and unnecessary utilization of costly services [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
      </sec>
      <sec>
        <title>Previous Work</title>
        <p>In a previous study [<xref ref-type="bibr" rid="ref11">11</xref>], we integrated patient-level clinical, demographic, and visit data with population-level social determinants of health measures to develop decision models that predicted patient need for behavioral health, dietitian, social work, or other wraparound service referrals. We also compared the performance of models built with and without population-level social determinants of health indicators. These models achieved reasonable performance with area under the receiver operating characteristic curve values between 70% and 78%, and sensitivity, specificity, and accuracy values ranging between 50% and 77%. We integrated these models into nine federally qualified health center sites operated by Eskenazi Health, a county-owned safety-net provider located in Indianapolis, Indiana. A subsequent trial identified increased rates of referral when predicted-need scores were shared with primary care end users [<xref ref-type="bibr" rid="ref12">12</xref>]. Nevertheless, there were several limitations in our previous study such as limited patient-level measures, a level of aggregated data that was too coarse, poor optimization, lack of consideration of data temporality, and limited generalizability.</p>
        <p>Our previous models included a wide range of patient-level clinical, behavioral, and encounter-based data elements as well as population-level social determinants of health measures; however, the models might have performed better with the inclusion of additional data elements such as medication data, insurance information, narcotics or substance abuse data, mental and behavioral disorders information inferred from diagnostic data, and patient-level social risk factors extracted from diagnostic data using ICD-10 classification codes [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
        <p>Our previous use of population-level social determinants of health factors measured at the zip-code level did not contribute to any statistically significant performance improvements. A wider range of measures of social determinants of health captured at smaller geographic areas might have yielded more discriminative power and have led to significant performance improvements.</p>
        <p>We used Youden J-index [<xref ref-type="bibr" rid="ref14">14</xref>] which optimizes sensitivity and specificity to determine optimal cutoff thresholds; however, this resulted in poor precision (positive predictive values that ranged between 15% and 50%). Given the importance of optimizing precision, which represents a model’s ability to return only relevant instances, alternate optimization techniques should be used.</p>
        <p>Our previous models included all data captured during the period under study, and not exclusively data elements that occurred prior to the outcome of interest. Failing to omit data elements that occurred after the outcomes of interest may have influenced the performance of these decision models [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
        <p>We developed our previous approach using data that was extracted from a homegrown electronic health record system [<xref ref-type="bibr" rid="ref16">16</xref>]. This limited its ability to be replicated across other settings that could support other widely used commercial electronic health record systems. Since our previous study, Eskenazi Health has transitioned to a commercial electronic health record system enabling us to adapt our solution to be vendor neutral and applicable to any electronic health record system.</p>
      </sec>
      <sec>
        <title>Objective</title>
        <p>This study addressed the aforementioned limitations by using additional patient- and population-level data elements as well as more advanced analytical methods to develop decision models to identify patients in need of referral to providers that can address social factors. We evaluated the contribution of these enhancements by recreating the original models that had been developed during the previous study (phase 1) and comparing their performance to that of new models developed during this study (phase 2). Furthermore, during each phase, we evaluated the contribution of small-area population-level social determinants of health measures to improving model performance.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Patient Sample</title>
        <p>We included adults (18 years of age or older) with at least one outpatient visit at Eskenazi Health between October 1, 2016 and May 1, 2018.</p>
      </sec>
      <sec>
        <title>Data Extraction</title>
        <p>Primary data sources for the patient cohort were Eskenazi Health’s Epic electronic health record system and the statewide health information exchange data repository known as the Indiana Network for Patient Care [<xref ref-type="bibr" rid="ref17">17</xref>], which provided out-of-network encounter data from hospitals, laboratory systems, long-term care facilities, and federally qualified health centers across the state. These data were supplemented with population-level social determinants of health measures derived from the US Census Bureau, the Marion County Public Health Department vital statistics system, and various community health surveys.</p>
      </sec>
      <sec>
        <title>Feature Extraction</title>
        <p>To recreate the models developed during phase 1, we extracted a subset of features that had been used to train the original models [<xref ref-type="bibr" rid="ref11">11</xref>]. We also extracted additional features for phase 2 enhancements. <xref ref-type="table" rid="table1">Table 1</xref> presents an outline of the feature sets for each phase of model development.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Comparison of the patient- and population-level data sets that were used for each phase.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="350"/>
            <col width="350"/>
            <thead>
              <tr valign="top">
                <td>Feature type</td>
                <td>Phase 1</td>
                <td>Added in phase 2</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Demographics</td>
                <td>Age, ethnicity, and gender</td>
                <td>Insurance (Medicare, Medicaid, self-pay)</td>
              </tr>
              <tr valign="top">
                <td>Weight and nutrition</td>
                <td>None</td>
                <td>BMI, hemoglobin A<sub>1c</sub></td>
              </tr>
              <tr valign="top">
                <td>Encounter frequency</td>
                <td>Outpatient visits, emergency department encounters, and inpatient admissions</td>
                <td>None</td>
              </tr>
              <tr valign="top">
                <td>Chronic conditions</td>
                <td>20 most common chronic conditions [<xref ref-type="bibr" rid="ref18">18</xref>]</td>
                <td>None</td>
              </tr>
              <tr valign="top">
                <td>Addictions and narcotics use</td>
                <td>Tobacco and opioid use</td>
                <td>Alcohol abuse, opioid overdose, use disorders</td>
              </tr>
              <tr valign="top">
                <td>Medications</td>
                <td>None</td>
                <td>145 categories of medication (categorized by therapeutic and pharmaceutical codes) [<xref ref-type="bibr" rid="ref19">19</xref>]</td>
              </tr>
              <tr valign="top">
                <td>Patient-level social risk</td>
                <td>None</td>
                <td>12 patient-level measures [<xref ref-type="bibr" rid="ref20">20</xref>]</td>
              </tr>
              <tr valign="top">
                <td>Population-level social determinants of health</td>
                <td>48 social determinants of health measures [<xref ref-type="bibr" rid="ref11">11</xref>]</td>
                <td>60 social determinants of health measures [<xref ref-type="bibr" rid="ref20">20</xref>]</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Preparation of the Gold Standard</title>
        <p>We sought to predict the need for referrals to behavioral health services, dietitian counseling, social work services, and all other wraparound services, which included respiratory therapy, financial planning, medical-legal partnership assistance, patient navigation, and pharmacist consultations. We used billing, encounter, and scheduling data extracted from the Indiana Network for Patient Care and Eskenazi Health to identify patients who had been referred to supplementary services between October 1, 2016 and May 1, 2018. We assumed that a patient with a referral had been in need of that referral even if the patient subsequently canceled or failed to keep the appointment.</p>
      </sec>
      <sec>
        <title>Data Vector Preparation</title>
        <p>We prepared two data vectors for each wraparound service for phase 1 modeling—a clinical data vector consisting of only patient-level data elements and a master data vector consisting of both patient- and population-level elements. Next, we created two more data vectors for each wraparound service for phase 2 data—a clinical data vector consisting of only patient-level data elements and a master data vector consisting of both patient- and population-level elements. For each patient, we included only data for events that had occurred at least 24 hours prior to the final outcome of interest. Features such as age (discrete by whole years); weight- or nutrition-based (categorical); gender (categorical); ethnicity (categorical); encounter frequency (number of each type per patient); and addictions or use of narcotics, chronic conditions, medications, and patient-level social risk (binary indicating presence or absence).</p>
        <p>Population-level social determinants of health measures were categorized into three groups—socioeconomic status, disease prevalence, and other miscellaneous factors (such as data on calls made by those who were seeking public assistance). Measures that were reported from across 1150 census tracts were used to calculate <italic>z</italic> scores (a numerical measurement relating a given value to the mean in a group of values) for each of the three categories. The <italic>z</italic> scores were grouped into clusters using the <italic>k</italic>-means algorithm [<xref ref-type="bibr" rid="ref21">21</xref>] and the elbow method [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
        <p>As requested by dietitians who consulted on our efforts, for dietitian referrals, prediction of need was restricted to a subset of patients with specific risk conditions (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Thus, data vectors for dietitian referrals included only patients with one or more of these conditions, which were identified by ICD-10 classification codes.</p>
      </sec>
      <sec>
        <title>Machine Learning Process for Phase 1 Models</title>
        <p>We randomly split each data vector into groups of 80% (training and validation data set) and 20% (test set). We replicated the same processes that were used during phase 1 [<xref ref-type="bibr" rid="ref11">11</xref>] to recreate a new set of models to be used for comparison.</p>
      </sec>
      <sec>
        <title>Machine Learning Process for Phase 2 Models</title>
        <p>We split each data vector into random groups of 80% (training and validation data set) and 20% (test set). We applied randomized lasso-based [<xref ref-type="bibr" rid="ref23">23</xref>] feature selection to the 80% training and validation data set to identify the most relevant features for each outcome of interest. We used machine learning in Python (version 3.6.1; scikit-learn library, version 0.21.0) [<xref ref-type="bibr" rid="ref24">24</xref>] to build extreme gradient boosting [<xref ref-type="bibr" rid="ref25">25</xref>] classification models to predict the need for referrals. The extreme gradient boosting algorithm is an implementation of gradient boosted decision trees [<xref ref-type="bibr" rid="ref26">26</xref>] designed for speed and performance. It has demonstrated a strong track record of outperforming other decision trees and other classification algorithms in machine learning competitions [<xref ref-type="bibr" rid="ref27">27</xref>]. The extreme gradient boosting algorithm consisted of multiple parameters, each of which could affect model performance. Thus, we decided to perform hyperparameter tuning on the training and validation data set using randomized search and 10-fold cross-validation. Decision model parameters that were modified as part of the hyperparameter tuning process are listed in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. The best performing models, parameterized using hyperparameter tuning, were applied to the test data sets.</p>
      </sec>
      <sec>
        <title>Analysis</title>
        <p>We assessed the performance of each decision model using the test set. For each record in the test set, each decision model produced a binary outcome (referral needed or referral not needed) and a probability score. We used these scores to calculate area under the receiver operating characteristic curve (AUROC), sensitivity, precision, F1 score, and specificity for each model. These measures were calculated using thresholds that optimized sensitivity and precision scores. We also calculated 95% confidence intervals for each measure using bootstrap methods [<xref ref-type="bibr" rid="ref28">28</xref>]. <italic>P</italic> values were calculated using guidelines presented by Altman and Bland [<xref ref-type="bibr" rid="ref29">29</xref>]. <italic>P</italic> values&#60;.05 were deemed statistically significant. For the models trained during each phase, we evaluated the contribution of population-level measures by comparing the performance of models trained using master (with population-level measures) vector models to the performance of clinical (without population-level measures) vector models. Next, we evaluated the value of the additional data sets and analytical methods that were used to train phase 2 models by comparing their performance to that of models trained in phase 1. <xref rid="figure1" ref-type="fig">Figure 1</xref> presents a flowchart that describes the approach.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>The complete study approach from data collection and decision-model building to evaluation of results. ROC: receiver operating characteristic; SDOH: social determinants of health; XGBoost: extreme gradient boosting.</p>
          </caption>
          <graphic xlink:href="medinform_v8i7e16129_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>Our patient sample consisted of 72,484 adult patients (<xref ref-type="table" rid="table2">Table 2</xref>). Of these patients, 15,867 (21.9%) met the dietitian referral criteria. Similar to that of phase 1, our patient population reflected an adult, urban, low-income primary care safety-net population; patients ranged in age from 18 to 107 years and were predominantly female (47,187/72,484, 65.1%). Referral types, which constituted our gold standard reference, were behavioral health (12,162/72,484, 16.8%), social work (4104/72,484, 5.7%), dietitian counseling (4330/15,867, 27.3%), and other services (17,877/72,484, 24.7%).</p>
      <p>As with our previous effort, use of population-level social determinants of health measures led to only minimal changes in each performance metric across models trained under phases 1 and 2, and were not statistically significant (behavioural health: F1 score <italic>P</italic>=.08, AUROC <italic>P</italic>=.09; social work: F1 score <italic>P</italic>=.16, AUROC <italic>P</italic>=.09; dietitian: F1 score <italic>P</italic>=.08, AUROC <italic>P</italic>=.14; other: F1 score <italic>P</italic>=.33, AUROC <italic>P</italic>=.21). Thus, we evaluated the contribution of the additional data sets, classification algorithms, and analytical approaches leveraged in phase 2 by comparing clinical vector models developed during phase 1 to those developed during phase 2.</p>
      <p><xref ref-type="table" rid="table3">Table 3</xref> presents a comparison of clinical vector model performance for phase 1 and phase 2. Phase 2 models yielded significantly better results than those of phase 1 models across all performance metrics except sensitivity for social work services (phase 1: 67.0%, 95% CI 63.4%-72.2%; phase 2: 72.4%, 95% CI 69.1%-75.6%; <italic>P</italic>=.07). Phase 2 decision models reported performance measures ranging from 59.2% to 99.3% which were statistically superior to performance measures reported by phase 1 models which ranged from 38.2% to 88.3%. For every clinical vector, phase 2 models reported significantly better area under the receiver operating characteristic curve values than those reported for phase 1 models (behavioral health: <italic>P</italic>=.01; social work: <italic>P</italic>=.03; dietitian: <italic>P</italic>=.001; other: <italic>P</italic>=.02). Furthermore, phase 2 precision scores were significantly greater than those reported in phase 1 (behavioral health: <italic>P</italic>&#60;.001; social work: <italic>P</italic>&#60;.001; dietitian: <italic>P</italic>=.02; other: <italic>P</italic>&#60;.001). We also evaluated model fit using logarithmic loss (log loss), which measures the performance of a classification model where prediction input is a probability between 0 and 1, and using lift curves [<xref ref-type="bibr" rid="ref30">30</xref>], which compares a decision model to a random model for the given percentile of top scored predictions. Log loss values were 0.09 (behavioral health), 0.07 (social work), 0.32 (dietitian), and 0.34 (other). Lift scores for each decision model are shown in a figure in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Characteristics of the adult, primary care patient sample whose data were used in phase 2 risk predictive modeling.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="570"/>
          <col width="400"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Demographic characteristics</td>
              <td>Values</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="2">Age (years), mean (SD)</td>
              <td>44.1 (16.6)</td>
            </tr>
            <tr valign="top">
              <td colspan="3">
                <bold>Gender (N=72,484), n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>Male</td>
              <td>25,297 (34.9)</td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>Female</td>
              <td>47,187 (65.1)</td>
            </tr>
            <tr valign="top">
              <td colspan="3">
                <bold>Insurance provider (N=72,484), n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>Medicaid or public insurance</td>
              <td>41,316 (57.0)</td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>Private</td>
              <td>31,168 (43.0)</td>
            </tr>
            <tr valign="top">
              <td colspan="3">
                <bold>BMI category (N=72,484), n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>BMI&#60;18.5</td>
              <td>6379 (8.8)</td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>18.5≤BMI&#60;25</td>
              <td>8698 (12.0)</td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>25≤BMI&#60;30</td>
              <td>10,148 (14.0)</td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>BMI≥30</td>
              <td>20,875 (28.8)</td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>Missing</td>
              <td>26,384 (36.4)</td>
            </tr>
            <tr valign="top">
              <td colspan="3">
                <bold>Ethnicity (N=72,484), n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>White, non-Hispanic </td>
              <td>18,266 (25.2)</td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>African American, non-Hispanic </td>
              <td>34,575 (47.7)</td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>Hispanic</td>
              <td>15,149 (20.9)</td>
            </tr>
            <tr valign="top">
              <td> </td>
              <td>Other</td>
              <td>4494 (6.2)</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Comparison of clinical vector model performance for phase 1 and phase 2.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="370"/>
          <col width="0"/>
          <col width="250"/>
          <col width="0"/>
          <col width="250"/>
          <col width="0"/>
          <col width="100"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Clinical vector performance measures</td>
              <td colspan="4">Model performance, % (95% CI)</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="bottom">
              <td>
                <break/>
              </td>
              <td colspan="2"> </td>
              <td colspan="2">Phase 1</td>
              <td colspan="2">Phase 2</td>
              <td><italic>P</italic> value<sup>a</sup></td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="2">
                <bold>Behavioral health services</bold>
              </td>
              <td colspan="2"> </td>
              <td colspan="2"> </td>
              <td colspan="2"> </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">Sensitivity</td>
              <td colspan="2">70.2 (68.0, 72.5)</td>
              <td colspan="2">86.3 (83.1, 88.9)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">Specificity</td>
              <td colspan="2">78.5 (78.0, 78.9)</td>
              <td colspan="2">99.1 (98.5, 99.7)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">F1 score</td>
              <td colspan="2">56.6 (53.6, 58.9)</td>
              <td colspan="2">90.4 (87.4, 93.4)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">Precision (positive predictive value)</td>
              <td colspan="2">47.4 (44.2, 49.6)</td>
              <td colspan="2">95.0 (92.0, 98.3)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="bottom">
              <td>
                <break/>
              </td>
              <td colspan="2">AUROC<sup>b</sup></td>
              <td colspan="2">88.3 (87.4, 89.2)</td>
              <td colspan="2">98.0 (97.6, 98.5)</td>
              <td>.01</td>
            </tr>
            <tr valign="top">
              <td colspan="2">
                <bold>Social work services</bold>
              </td>
              <td colspan="2"> </td>
              <td colspan="2"> </td>
              <td colspan="2"> </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">Sensitivity</td>
              <td colspan="2">67.0 (63.4, 72.2)</td>
              <td colspan="2">72.4 (69.1, 75.6)</td>
              <td>.07</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">Specificity</td>
              <td colspan="2">79.6 (79.1, 79.8)</td>
              <td colspan="2">99.3 (99.2, 99.6)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">F1 score</td>
              <td colspan="2">48.6 (45.0, 52.5)</td>
              <td colspan="2">82.5 (79.7, 85.3)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">Precision (positive predictive value)</td>
              <td colspan="2">38.2 (34.8, 41.2)</td>
              <td colspan="2">95.8 (93.8, 97.8)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">AUROC</td>
              <td colspan="2">87.6 (86.1, 89.2)</td>
              <td colspan="2">93.7 (92.5, 95.0)</td>
              <td>.03</td>
            </tr>
            <tr valign="top">
              <td colspan="2">
                <bold>Dietitian counseling services</bold>
              </td>
              <td colspan="2"> </td>
              <td colspan="2"> </td>
              <td colspan="2"> </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">Sensitivity</td>
              <td colspan="2">60.7 (56.5, 64.7)</td>
              <td colspan="2">73.6 (70.5, 77.0)</td>
              <td>.02</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">Specificity</td>
              <td colspan="2">73.2 (71.9, 74.9)</td>
              <td colspan="2">93.3 (90.8, 94.6)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">F1 score</td>
              <td colspan="2">61.5 (57.3, 66.0)</td>
              <td colspan="2">76.4 (73.3, 80.4)</td>
              <td>.001</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">Precision (positive predictive value)</td>
              <td colspan="2">62.2 (58.1, 67.4)</td>
              <td colspan="2">79.4 (76.4, 84.2)</td>
              <td>.02</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">AUROC</td>
              <td colspan="2">82.5 (81.5, 83.6)</td>
              <td colspan="2">91.5 (90.3, 92.6)</td>
              <td>.001</td>
            </tr>
            <tr valign="top">
              <td colspan="2">
                <bold>Other wraparound services</bold>
              </td>
              <td colspan="2"> </td>
              <td colspan="2"> </td>
              <td colspan="2"> </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">Sensitivity</td>
              <td colspan="2">44.5 (42.7, 46.1)</td>
              <td colspan="2">59.2 (56.5, 63.8)</td>
              <td>.002</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">Specificity</td>
              <td colspan="2">78.5 (77.5, 79.3)</td>
              <td colspan="2">92.9 (89.7, 96.1)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">F1 score</td>
              <td colspan="2">43.2 (40.0, 45.7)</td>
              <td colspan="2">65.5 (62.9, 67.6)</td>
              <td>.01</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">Precision (positive predictive value)</td>
              <td colspan="2">41.9 (37.7, 45.2)</td>
              <td colspan="2">73.4 (70.5, 77.7)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="2">AUROC</td>
              <td colspan="2">77.2 (76.2, 78.1)</td>
              <td colspan="2">85.3 (84.4, 86.0)</td>
              <td>.02</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup><italic>P</italic> values were calculated using confidence intervals [<xref ref-type="bibr" rid="ref29">29</xref>].</p>
          </fn>
          <fn id="table3fn2">
            <p><sup>b</sup>AUROC: area under the receiver operating characteristic curve.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our study expanded upon our previous efforts to demonstrate the feasibility of predicting the need for wraparound services such as behavioral health, dietitian, social work and other services using a range of readily available patient- and population-level data sets that represent an individual’s well-being as well as their socioeconomic environment. Specifically, we demonstrated that inclusion of additional patient-level data sets that represented medication history, addiction and mental disorders, and patient-level social risk factors, as well as use of the extreme gradient boosting classification algorithm and advanced analytical methods for model development led to statistically superior performance measures. Furthermore, improved precision scores were made possible by additional data elements and alternate optimization techniques that maximized precision and recall scores and which greatly improved the practical application of our solution. Each decision model reported area under the receiver operating characteristic curve scores from 85% to 98%, which are superior to the global performance of prediction models on mortality [<xref ref-type="bibr" rid="ref31">31</xref>], hospital readmissions [<xref ref-type="bibr" rid="ref4">4</xref>], and disease development [<xref ref-type="bibr" rid="ref32">32</xref>]; however, inclusion of additional population-level aggregate social determinants of health measures in our low-income population did not contribute significantly toward performance improvements despite the introduction of additional indicators, more granular geographic measurement units (by switching from zip code to census tract level), and vectorization methods that converted these to standardized scores to emphasize variance and create indices.</p>
        <p>The inability of population-level social determinants of health measures to improve model performance may be because our patient population was comprised of an urban safety-net group with relatively little variability in socioeconomic, policy, and environmental conditions. Thus, it is possible that machine learning studies using larger, more diverse populations may benefit from the use of population-level data [<xref ref-type="bibr" rid="ref33">33</xref>]. Moreover, the lack of improvement may be related to our choice of prediction outcome. Wraparound service providers work to address the social needs and risk factors of individual patients and not population-level social determinants. Likewise, social determinants of health factors influence social risk [<xref ref-type="bibr" rid="ref34">34</xref>], but these population conditions are not the reason for referral to a wraparound service provider. It is likely that social factors are more relevant to, and observed by, the referring provider. Nevertheless, the continued lack of meaningful contribution to our models prompts questions regarding how to best leverage aggregate social determinants of health measures for decision making. This is an important and unanswered question, as census-based aggregate measures are the most widely available and easily accessible indicators of social determinants of health available to researchers and health organizations [<xref ref-type="bibr" rid="ref35">35</xref>]. In contrast, several patient-level social and behavioral factors measures were influential in the models. This indicates the need for more widespread use and collection of social factors in clinical settings [<xref ref-type="bibr" rid="ref36">36</xref>]. Electronic health record organizations seeking to identify patients with social risk factors and in need of social services must integrate the collection of social risk data into their workflow [<xref ref-type="bibr" rid="ref37">37</xref>].</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This work has limitations. Notably, the phase 2 model development approach leveraged the same urban safety-net population that was used to develop phase 1 models. Thus, though the phase 2 demonstrate superior performance, the results may not be generalizable to other commercially insured or broader populations. In addition, we only leveraged structured data that had been extracted from the Indiana Network for Patient Care or from Eskenazi Health for the machine learning process. These methods may not be utilized at other health care settings that are not part of a large, robust health information exchange. Expanding our approaches to different geographic regions would require standardization of population-level sources as well as infrastructure and interoperability measures to effectively store and exchange such data sets [<xref ref-type="bibr" rid="ref38">38</xref>]. Also, we did not utilize any unstructured data sets for machine learning. This is a significant issue as up to 80% of health data may be collected in an unstructured format [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. Despite these limitations, the considerable performance enhancements demonstrated by these models suggest significant potential to enable access to various social services; however, it must be noted that social determinants of health risk factors are often confounded with one another. Thus, mitigating a social need that arises from several social determinants of health risk factors may not result in any positive improvements to a patient [<xref ref-type="bibr" rid="ref41">41</xref>].</p>
      </sec>
      <sec>
        <title>Future Work</title>
        <p>Our next steps include expanding our models to predict additional wraparound services of interest. Furthermore, we believe that there is an acute need to improve the explainability and actionability of machine learning predictions using novel methods such as counterfactual reasoning [<xref ref-type="bibr" rid="ref42">42</xref>]. We perceive that similar predictive models for minors and the services available to these patients would be of significant value for health care decision making. Our inability to utilize unstructured data sets for machine learning is a significant concern. Various natural language processing toolkits can leverage unstructured data sets for machine learning; however, integrating these toolkits into inproduction systems is challenging due to infrastructure and maintenance costs. Moreover, searching and indexing the massive quantities of free-text reports that are collected statewide would require additional computational effort, and may significantly increase computation time. We are currently engaged in efforts to utilize the Regenstrief Institute’s nDepth tool [<xref ref-type="bibr" rid="ref43">43</xref>] to evaluate the ability to extract actionable elements at a production setting.</p>
      </sec>
      <sec>
        <title>Integration Into Electronic Health Record Systems</title>
        <p>As noted, this work built upon existing risk prediction efforts. We have integrated the updated decision models into the existing platform for all scheduled and walk-in appointments. Model results are presented to end users using a customized interface within the electronic health record with metadata on which features drove the extreme gradient boosting decision-making process, and with predicted probabilities categorized as low, rising, or high risk [<xref ref-type="bibr" rid="ref12">12</xref>] (<xref rid="figure2" ref-type="fig">Figure 2</xref>). This study’s methodological work sets the foundation for our future evaluations of our intervention’s impact on patient outcomes.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Integration of decision models into hospital workflow. INPC: Indiana Network for Patient Care.</p>
          </caption>
          <graphic xlink:href="medinform_v8i7e16129_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study developed decision models that integrate a wide range of individual and population data elements and advanced machine learning methods that are capable of predicting need for various wraparound social services; however, population-level data may not contribute to improvements in predictive performance unless they represent larger, diverse populations.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>List of risk conditions used to identify a subpopulation for predicting dietitian referrals.</p>
        <media xlink:href="medinform_v8i7e16129_app1.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Parameters that were modified as part of the hyperparameter tuning process for phase 2.</p>
        <media xlink:href="medinform_v8i7e16129_app2.docx" xlink:title="DOCX File , 13 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Lift scores reported by each decision model.</p>
        <media xlink:href="medinform_v8i7e16129_app3.png" xlink:title="PNG File , 96 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUROC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ICD-10</term>
          <def>
            <p>International Statistical Classification of Diseases, 10th Revision</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>Support for this research was provided by the Robert Wood Johnson Foundation. The views expressed herein do not necessarily reflect the views of the Robert Wood Johnson Foundation. The authors also wish to thank Jennifer Williams (Regenstrief Institute), Amber Blackmon (Indiana University), the Regenstrief data core team, and Eskenazi Health of Indiana for their assistance.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>SNK, SG, PKH, NM, and JRV are cofounders of Uppstroms LLC, a commercial entity established to disseminate the artificial intelligence models discussed in this paper. </p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>FS</given-names>
            </name>
            <name name-style="western">
              <surname>Varmus</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A new initiative on precision medicine</article-title>
          <source>N Engl J Med</source>
          <year>2015</year>
          <month>02</month>
          <day>26</day>
          <volume>372</volume>
          <issue>9</issue>
          <fpage>793</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25635347"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMp1500523</pub-id>
          <pub-id pub-id-type="medline">25635347</pub-id>
          <pub-id pub-id-type="pmcid">PMC5101938</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martin Sanchez</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bellazzi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez-Campos</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Exposome informatics: considerations for the design of future biomedical research information systems</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2014</year>
          <volume>21</volume>
          <issue>3</issue>
          <fpage>386</fpage>
          <lpage>90</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24186958"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001772</pub-id>
          <pub-id pub-id-type="medline">24186958</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2013-001772</pub-id>
          <pub-id pub-id-type="pmcid">PMC3994854</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Charles</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gabriel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Searcy</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Adoption of electronic health record systems among US non-federal acute care hospitals</article-title>
          <source>ONC data brief</source>
          <year>2013</year>
          <volume>9</volume>
          <fpage>2008</fpage>
          <lpage>2012</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.healthit.gov/sites/default/files/oncdatabrief16.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kansagara</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Englander</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Salanitro</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kagen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Theobald</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Freeman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kripalani</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Risk prediction models for hospital readmission: a systematic review</article-title>
          <source>JAMA</source>
          <year>2011</year>
          <month>10</month>
          <day>19</day>
          <volume>306</volume>
          <issue>15</issue>
          <fpage>1688</fpage>
          <lpage>98</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22009101"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2011.1515</pub-id>
          <pub-id pub-id-type="medline">22009101</pub-id>
          <pub-id pub-id-type="pii">306/15/1688</pub-id>
          <pub-id pub-id-type="pmcid">PMC3603349</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vuik</surname>
              <given-names>SI</given-names>
            </name>
            <name name-style="western">
              <surname>Mayer</surname>
              <given-names>EK</given-names>
            </name>
            <name name-style="western">
              <surname>Darzi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Patient segmentation analysis offers significant benefits for integrated care and support</article-title>
          <source>Health Aff (Millwood)</source>
          <year>2016</year>
          <month>05</month>
          <day>01</day>
          <volume>35</volume>
          <issue>5</issue>
          <fpage>769</fpage>
          <lpage>75</lpage>
          <pub-id pub-id-type="doi">10.1377/hlthaff.2015.1311</pub-id>
          <pub-id pub-id-type="medline">27140981</pub-id>
          <pub-id pub-id-type="pii">35/5/769</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vest</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Haut</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Halverson</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Menachemi</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Indianapolis provider's use of wraparound services associated with reduced hospitalizations and emergency department visits</article-title>
          <source>Health Aff (Millwood)</source>
          <year>2018</year>
          <month>10</month>
          <volume>37</volume>
          <issue>10</issue>
          <fpage>1555</fpage>
          <lpage>1561</lpage>
          <pub-id pub-id-type="doi">10.1377/hlthaff.2018.0075</pub-id>
          <pub-id pub-id-type="medline">30273041</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Whelihan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Navarro</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Boyle</surname>
              <given-names>KR</given-names>
            </name>
            <collab>SDH Card Study Implementation Team</collab>
          </person-group>
          <article-title>Community health center provider ability to identify, treat and account for the social determinants of health: a card study</article-title>
          <source>BMC Fam Pract</source>
          <year>2016</year>
          <month>08</month>
          <day>27</day>
          <volume>17</volume>
          <fpage>121</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcfampract.biomedcentral.com/articles/10.1186/s12875-016-0526-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12875-016-0526-8</pub-id>
          <pub-id pub-id-type="medline">27567892</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12875-016-0526-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC5002327</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fenton</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Health care's blind side: the overlooked connection between social needs and good health</article-title>
          <source>Princeton: Robert Wood Johnston Foundation</source>
          <year>2011</year>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fitzpatrick</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rosella</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Calzavara</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Petch</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pinto</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Manson</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Goel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Wodchis</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Looking beyond income and education: socioeconomic status gradients among future high-cost users of health care</article-title>
          <source>Am J Prev Med</source>
          <year>2015</year>
          <month>08</month>
          <volume>49</volume>
          <issue>2</issue>
          <fpage>161</fpage>
          <lpage>71</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0749-3797(15)00082-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.amepre.2015.02.018</pub-id>
          <pub-id pub-id-type="medline">25960393</pub-id>
          <pub-id pub-id-type="pii">S0749-3797(15)00082-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaufman</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Theory vs practice: should primary care practice take on social determinants of health now? yes</article-title>
          <source>Ann Fam Med</source>
          <year>2016</year>
          <month>03</month>
          <volume>14</volume>
          <issue>2</issue>
          <fpage>100</fpage>
          <lpage>1</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.annfammed.org/cgi/pmidlookup?view=long&#38;pmid=26951582"/>
          </comment>
          <pub-id pub-id-type="doi">10.1370/afm.1915</pub-id>
          <pub-id pub-id-type="medline">26951582</pub-id>
          <pub-id pub-id-type="pii">14/2/100</pub-id>
          <pub-id pub-id-type="pmcid">PMC4781510</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kasthurirathne</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vest</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Menachemi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Halverson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Grannis</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Assessing the capacity of social determinants of health data to augment predictive models identifying patients in need of wraparound social services</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2018</year>
          <month>01</month>
          <day>01</day>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>47</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocx130</pub-id>
          <pub-id pub-id-type="medline">29177457</pub-id>
          <pub-id pub-id-type="pii">4645255</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vest</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Menachemi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Grannis</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrell</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Kasthurirathne</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Halverson</surname>
              <given-names>PK</given-names>
            </name>
          </person-group>
          <article-title>Impact of risk stratification on referrals and uptake of wraparound services that address social determinants: a stepped wedged trial</article-title>
          <source>Am J Prev Med</source>
          <year>2019</year>
          <month>04</month>
          <volume>56</volume>
          <issue>4</issue>
          <fpage>e125</fpage>
          <lpage>e133</lpage>
          <pub-id pub-id-type="doi">10.1016/j.amepre.2018.11.009</pub-id>
          <pub-id pub-id-type="medline">30772150</pub-id>
          <pub-id pub-id-type="pii">S0749-3797(18)32433-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <article-title>ICD-10 adds more detail on the social determinants of health</article-title>
          <source>LaBrec P</source>
          <year>2016</year>
          <access-date>2019-02-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.3mhisinsideangle.com/blog-post/icd-10-adds-more-detail-on-the-social-determinants-of-health/">https://www.3mhisinsideangle.com/blog-post/icd-10-adds-more-detail-on-the-social-determinants-of-health/</ext-link>
            <ext-link ext-link-type="webcite" xlink:href="76Qh0qs84"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Youden</surname>
              <given-names>WJ</given-names>
            </name>
          </person-group>
          <article-title>Index for rating diagnostic tests</article-title>
          <source>Cancer</source>
          <year>1950</year>
          <month>01</month>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>32</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1002/1097-0142(1950)3:1&#60;32::aid-cncr2820030106&#62;3.0.co;2-3</pub-id>
          <pub-id pub-id-type="medline">15405679</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bahadori</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schuetz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Doctor ai: predicting clinical events via recurrent neural networks</article-title>
          <source>arXiv preprint arXiv</source>
          <year>2015</year>
          <fpage>151105942</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Duke</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Morea</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mamlin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Simonaitis</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Takesue</surname>
              <given-names>BY</given-names>
            </name>
            <name name-style="western">
              <surname>Dixon</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Dexter</surname>
              <given-names>PR</given-names>
            </name>
          </person-group>
          <article-title>Regenstrief Institute's Medical Gopher: a next-generation homegrown electronic medical record system</article-title>
          <source>Int J Med Inform</source>
          <year>2014</year>
          <month>03</month>
          <volume>83</volume>
          <issue>3</issue>
          <fpage>170</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2013.11.004</pub-id>
          <pub-id pub-id-type="medline">24373714</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(13)00245-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Overhage</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schadow</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Blevins</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dexter</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>Mamlin</surname>
              <given-names>B</given-names>
            </name>
            <collab>INPC Management Committee</collab>
          </person-group>
          <article-title>The Indiana network for patient care: a working local health information infrastructure</article-title>
          <source>Health Aff (Millwood)</source>
          <year>2005</year>
          <volume>24</volume>
          <issue>5</issue>
          <fpage>1214</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1377/hlthaff.24.5.1214</pub-id>
          <pub-id pub-id-type="medline">16162565</pub-id>
          <pub-id pub-id-type="pii">24/5/1214</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goodman</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Posner</surname>
              <given-names>SF</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Parekh</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Koh</surname>
              <given-names>HK</given-names>
            </name>
          </person-group>
          <article-title>Defining and measuring chronic conditions: imperatives for research, policy, program, and practice</article-title>
          <source>Prev Chronic Dis</source>
          <year>2013</year>
          <month>04</month>
          <day>25</day>
          <volume>10</volume>
          <fpage>E66</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/pcd/issues/2013/12_0239.htm"/>
          </comment>
          <pub-id pub-id-type="doi">10.5888/pcd10.120239</pub-id>
          <pub-id pub-id-type="medline">23618546</pub-id>
          <pub-id pub-id-type="pii">E66</pub-id>
          <pub-id pub-id-type="pmcid">PMC3652713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kilbourne</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Powell</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Normalized names for clinical drugs: RxNorm at 6 years</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <month>07</month>
          <day>01</day>
          <volume>18</volume>
          <issue>4</issue>
          <fpage>441</fpage>
          <lpage>448</lpage>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000116</pub-id>
          <pub-id pub-id-type="medline">21515544</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <article-title>Beyond health care: the role of social determinants in promoting health and health equity</article-title>
          <source>Artiga S, Hinton E</source>
          <year>2018</year>
          <access-date>2020-06-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://files.kff.org/attachment/issue-brief-beyond-health-care">http://files.kff.org/attachment/issue-brief-beyond-health-care</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>Data clustering: 50 years beyond K-means</article-title>
          <source>Pattern Recognition Letters</source>
          <year>2010</year>
          <month>6</month>
          <volume>31</volume>
          <issue>8</issue>
          <fpage>651</fpage>
          <lpage>666</lpage>
          <pub-id pub-id-type="doi">10.1016/j.patrec.2009.09.011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kodinariya</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Makwana</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Review on determining number of Cluster in K-Means Clustering</article-title>
          <source>International Journal</source>
          <year>2013</year>
          <volume>1</volume>
          <issue>6</issue>
          <fpage>90</fpage>
          <lpage>5</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Alelyani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Aggarwal</surname>
              <given-names>CC</given-names>
            </name>
          </person-group>
          <article-title>Feature selection for classification: a review</article-title>
          <source>Data Classification: Algorithms and Applications</source>
          <year>2014</year>
          <month>07</month>
          <day>25</day>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Chapman and Hall/CRC</publisher-name>
          <fpage>A</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: Machine learning in Python</article-title>
          <source>Journal of machine learning research. (Oct)</source>
          <year>2011</year>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>XGBoost: A Scalable Tree Boosting System</article-title>
          <year>2016</year>
          <month>8</month>
          <day>1</day>
          <conf-name>22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>August 2016</conf-date>
          <conf-loc>San Francisco</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chow</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Z</given-names>
            </name>
            <collab>editors</collab>
          </person-group>
          <article-title>Stochastic gradient boosted distributed decision trees</article-title>
          <year>2019</year>
          <month>12</month>
          <day>1</day>
          <conf-name>Proceedings of the 18th ACM conference on Information and Knowledge management; : ACM</conf-name>
          <conf-date>2009</conf-date>
          <conf-loc>Hong Kong</conf-loc>
          <pub-id pub-id-type="doi">10.1145/1645953.1646301</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nielsen</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Tree Boosting With XGBoost - Why Does XGBoost Win "Every" Machine Learning Competition?</article-title>
          <source>NTNU Open</source>
          <year>2016</year>
          <publisher-loc>Trondheim</publisher-loc>
          <publisher-name>NTNU</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Calonico</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cattaneo</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Titiunik</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Robust nonparametric confidence intervals for regression-discontinuity designs</article-title>
          <source>Econometrica</source>
          <year>2014</year>
          <month>12</month>
          <day>23</day>
          <volume>82</volume>
          <issue>6</issue>
          <fpage>2295</fpage>
          <lpage>2326</lpage>
          <pub-id pub-id-type="doi">10.3982/ecta11757</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>Douglas G</given-names>
            </name>
            <name name-style="western">
              <surname>Bland</surname>
              <given-names>J Martin</given-names>
            </name>
          </person-group>
          <article-title>How to obtain the P value from a confidence interval</article-title>
          <source>BMJ</source>
          <year>2011</year>
          <volume>343</volume>
          <fpage>d2304</fpage>
          <pub-id pub-id-type="doi">10.1136/bmj.d2304</pub-id>
          <pub-id pub-id-type="medline">22803193</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vuk</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Curk</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>ROC curve, lift chart and calibration plot</article-title>
          <source>Metodoloski zvezki</source>
          <year>2006</year>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>89</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alba</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Agoritsas</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Jankowski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Courvoisier</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Walter</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Guyatt</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>HJ</given-names>
            </name>
          </person-group>
          <article-title>Risk prediction models for mortality in ambulatory patients with heart failure: a systematic review</article-title>
          <source>Circ Heart Fail</source>
          <year>2013</year>
          <month>09</month>
          <day>01</day>
          <volume>6</volume>
          <issue>5</issue>
          <fpage>881</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1161/CIRCHEARTFAILURE.112.000043</pub-id>
          <pub-id pub-id-type="medline">23888045</pub-id>
          <pub-id pub-id-type="pii">CIRCHEARTFAILURE.112.000043</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Echouffo-Tcheugui</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Batty</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Kivimäki</surname>
              <given-names>Mika</given-names>
            </name>
            <name name-style="western">
              <surname>Kengne</surname>
              <given-names>AP</given-names>
            </name>
          </person-group>
          <article-title>Risk models to predict hypertension: a systematic review</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <volume>8</volume>
          <issue>7</issue>
          <fpage>e67370</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0067370"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0067370</pub-id>
          <pub-id pub-id-type="medline">23861760</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-08187</pub-id>
          <pub-id pub-id-type="pmcid">PMC3702558</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dalton</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Perzynski</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Zidar</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Rothberg</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Coulton</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Milinovich</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Einstadter</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Karichu</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Dawson</surname>
              <given-names>NV</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of cardiovascular risk prediction varies by neighborhood socioeconomic position: a retrospective cohort study</article-title>
          <source>Ann Intern Med</source>
          <year>2017</year>
          <month>10</month>
          <day>03</day>
          <volume>167</volume>
          <issue>7</issue>
          <fpage>456</fpage>
          <lpage>464</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28847012"/>
          </comment>
          <pub-id pub-id-type="doi">10.7326/M16-2543</pub-id>
          <pub-id pub-id-type="medline">28847012</pub-id>
          <pub-id pub-id-type="pii">2652557</pub-id>
          <pub-id pub-id-type="pmcid">PMC6435027</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alderwick</surname>
              <given-names>Hugh</given-names>
            </name>
            <name name-style="western">
              <surname>Gottlieb</surname>
              <given-names>Laura M</given-names>
            </name>
          </person-group>
          <article-title>Meanings and misunderstandings: a social determinants of health lexicon for health care systems</article-title>
          <source>Milbank Q</source>
          <year>2019</year>
          <month>06</month>
          <volume>97</volume>
          <issue>2</issue>
          <fpage>407</fpage>
          <lpage>419</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31069864"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/1468-0009.12390</pub-id>
          <pub-id pub-id-type="medline">31069864</pub-id>
          <pub-id pub-id-type="pmcid">PMC6554506</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Golembiewski</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Blackmon</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Hinrichs</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Vest</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Combining nonclinical determinants of health and clinical data for research and evaluation: rapid review</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2019</year>
          <month>10</month>
          <day>07</day>
          <volume>5</volume>
          <issue>4</issue>
          <fpage>e12846</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2019/4/e12846/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12846</pub-id>
          <pub-id pub-id-type="medline">31593550</pub-id>
          <pub-id pub-id-type="pii">v5i4e12846</pub-id>
          <pub-id pub-id-type="pmcid">PMC6803891</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Institute of Medicine</collab>
          </person-group>
          <source>National Academies Press (US)</source>
          <year>2014</year>
          <publisher-loc>Capturing social and behavioral domains and measures in electronic health records: phase 2</publisher-loc>
          <publisher-name>National Academies Press</publisher-name>
          <fpage>0309312434</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gold</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bunce</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cowburn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dambrun</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dearing</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Middendorf</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mossman</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hollombe</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mahr</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Melgar</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gottlieb</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cottrell</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Adoption of social determinants of health EHR tools by community health centers</article-title>
          <source>Ann Fam Med</source>
          <year>2018</year>
          <month>09</month>
          <volume>16</volume>
          <issue>5</issue>
          <fpage>399</fpage>
          <lpage>407</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.annfammed.org/cgi/pmidlookup?view=long&#38;pmid=30201636"/>
          </comment>
          <pub-id pub-id-type="doi">10.1370/afm.2275</pub-id>
          <pub-id pub-id-type="medline">30201636</pub-id>
          <pub-id pub-id-type="pii">16/5/399</pub-id>
          <pub-id pub-id-type="pmcid">PMC6131002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kasthurirathne</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cormer</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Devadasan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Biondich</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>editors</article-title>
          <year>2019</year>
          <month>03</month>
          <day>25</day>
          <conf-name>Development of a FHIR Based Application Programming Interface for Aggregate-Level Social Determinants of Health. : AMIA Informatics summit  Conference Proceedings</conf-name>
          <conf-date>2019</conf-date>
          <conf-loc>San Francisco</conf-loc>
          <pub-id pub-id-type="doi">10.4135/9781529705119</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kung</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Byrd</surname>
              <given-names>TA</given-names>
            </name>
          </person-group>
          <article-title>Big data analytics: understanding its capabilities and potential benefits for healthcare organizations</article-title>
          <source>Technological Forecasting and Social Change</source>
          <year>2018</year>
          <month>01</month>
          <volume>126</volume>
          <fpage>3</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1016/j.techfore.2015.12.019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hubbard</surname>
              <given-names>W N</given-names>
            </name>
            <name name-style="western">
              <surname>Westgate</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shapiro</surname>
              <given-names>L M</given-names>
            </name>
            <name name-style="western">
              <surname>Donaldson</surname>
              <given-names>R M</given-names>
            </name>
          </person-group>
          <article-title>Acquired abnormalities of the tricuspid valve--an ultrasonographic study</article-title>
          <source>Int J Cardiol</source>
          <year>1987</year>
          <month>03</month>
          <volume>14</volume>
          <issue>3</issue>
          <fpage>311</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/0167-5273(87)90201-4</pub-id>
          <pub-id pub-id-type="medline">3549579</pub-id>
          <pub-id pub-id-type="pii">0167-5273(87)90201-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Castrucci</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Auerbach</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Meeting individual social needs falls short of addressing social determinants of health</article-title>
          <source>Health Affairs Blog</source>
          <access-date>2019-01-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.healthaffairs.org/do/10.1377/hblog20190115.234942/full/?utm_campaign=HASU&#38;utm_medium=email&#38;utm_content=Health+Affairs+In+2018%3A+Editor+s+Picks%3B+The+2020+Proposed+Payment+Notice%3B+Persistently+High-Cost+Medicare+Patients&#38;utm_source=Newsletter&#38;">https://www.healthaffairs.org/do/10.1377/hblog20190115.234942/full/?utm_campaign=HASU&#38;utm_ medium=email&#38;utm_content=Health+Affairs+In+2018%3A+Editor+s+Picks%3B+The+2020+Proposed+Payment+Notice%3B+ Persistently+High-Cost+Medicare+Patients&#38;utm_source=Newsletter&#38;</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pearl</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Theoretical impediments to machine learning with seven sparks from the causal revolution</article-title>
          <source>arXiv preprint arXiv</source>
          <year>2018</year>
          <fpage>180104016</fpage>
          <pub-id pub-id-type="doi">10.1145/3159652.3176182</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Regenstrief Institute Inc.</collab>
          </person-group>
          <article-title>nDepth</article-title>
          <source>regenstrief.org</source>
          <year>2019</year>
          <access-date>2019-01-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.regenstrief.org/implementation/ndepth/">https://www.regenstrief.org/implementation/ndepth/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
