<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i11e22689</article-id>
      <article-id pub-id-type="pmid">33164906</article-id>
      <article-id pub-id-type="doi">10.2196/22689</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Developing a Predictive Model for Asthma-Related Hospital Encounters in Patients With Asthma in a Large, Integrated Health Care System: Secondary Analysis</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Agresta</surname>
            <given-names>Thomas</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Fuller</surname>
            <given-names>Candace</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Luo</surname>
            <given-names>Gang</given-names>
          </name>
          <degrees>DPhil</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biomedical Informatics and Medical Education</institution>
            <institution>University of Washington</institution>
            <addr-line>UW Medicine South Lake Union, 850 Republican Street</addr-line>
            <addr-line>Building C, Box 358047</addr-line>
            <addr-line>Seattle, WA, 98195</addr-line>
            <country>United States</country>
            <phone>1 206 221 4596</phone>
            <fax>1 206 221 2671</fax>
            <email>gangluo@cs.wisc.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7217-4008</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Nau</surname>
            <given-names>Claudia L</given-names>
          </name>
          <degrees>DPhil</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0373-2560</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Crawford</surname>
            <given-names>William W</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8447-7532</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Schatz</surname>
            <given-names>Michael</given-names>
          </name>
          <degrees>MSc, MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7640-5560</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Zeiger</surname>
            <given-names>Robert S</given-names>
          </name>
          <degrees>MD, DPhil</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5788-5063</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Rozema</surname>
            <given-names>Emily</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6422-4482</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Koebnick</surname>
            <given-names>Corinna</given-names>
          </name>
          <degrees>DPhil</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8274-0309</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biomedical Informatics and Medical Education</institution>
        <institution>University of Washington</institution>
        <addr-line>Seattle, WA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Research &#38; Evaluation</institution>
        <institution>Kaiser Permanente Southern California</institution>
        <addr-line>Pasadena, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Allergy and Immunology</institution>
        <institution>Kaiser Permanente South Bay Medical Center</institution>
        <addr-line>Harbor City, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Allergy</institution>
        <institution>Kaiser Permanente Southern California</institution>
        <addr-line>San Diego, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Gang Luo <email>gangluo@cs.wisc.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>11</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>9</day>
        <month>11</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>11</issue>
      <elocation-id>e22689</elocation-id>
      <history>
        <date date-type="received">
          <day>29</day>
          <month>7</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>6</day>
          <month>9</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>15</day>
          <month>9</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>18</day>
          <month>10</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Gang Luo, Claudia L Nau, William W Crawford, Michael Schatz, Robert S Zeiger, Emily Rozema, Corinna Koebnick. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 09.11.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2020/11/e22689/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Asthma causes numerous hospital encounters annually, including emergency department visits and hospitalizations. To improve patient outcomes and reduce the number of these encounters, predictive models are widely used to prospectively pinpoint high-risk patients with asthma for preventive care via care management. However, previous models do not have adequate accuracy to achieve this goal well. Adopting the modeling guideline for checking extensive candidate features, we recently constructed a machine learning model on Intermountain Healthcare data to predict asthma-related hospital encounters in patients with asthma. Although this model is more accurate than the previous models, whether our modeling guideline is generalizable to other health care systems remains unknown.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to assess the generalizability of our modeling guideline to Kaiser Permanente Southern California (KPSC).</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The patient cohort included a random sample of 70.00% (397,858/568,369) of patients with asthma who were enrolled in a KPSC health plan for any duration between 2015 and 2018. We produced a machine learning model via a secondary analysis of 987,506 KPSC data instances from 2012 to 2017 and by checking 337 candidate features to project asthma-related hospital encounters in the following 12-month period in patients with asthma.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our model reached an area under the receiver operating characteristic curve of 0.820. When the cutoff point for binary classification was placed at the top 10.00% (20,474/204,744) of patients with asthma having the largest predicted risk, our model achieved an accuracy of 90.08% (184,435/204,744), a sensitivity of 51.90% (2259/4353), and a specificity of 90.91% (182,176/200,391).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our modeling guideline exhibited acceptable generalizability to KPSC and resulted in a model that is more accurate than those formerly built by others. After further enhancement, our model could be used to guide asthma care management.</p>
        </sec>
        <sec sec-type="registered-report">
          <title>International Registered Report Identifier (IRRID)</title>
          <p>RR2-10.2196/resprot.5039</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>asthma</kwd>
        <kwd>forecasting</kwd>
        <kwd>machine learning</kwd>
        <kwd>patient care management</kwd>
        <kwd>risk factors</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>About 8.4% of people in the United States have asthma [<xref ref-type="bibr" rid="ref1">1</xref>], which causes over 3000 deaths, around 500,000 hospitalizations, and over 2 million emergency department (ED) visits each year [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. To improve patient outcomes and cut the number of asthma-related hospital encounters including ED visits and hospitalizations, predictive models are widely used to prospectively pinpoint high-risk patients with asthma for preventive care via care management. This is the case with health care systems such as the University of Washington Medicine, Kaiser Permanente Northern California [<xref ref-type="bibr" rid="ref3">3</xref>], and Intermountain Healthcare, and with other health plans in 9 of 12 metropolitan communities [<xref ref-type="bibr" rid="ref4">4</xref>]. Once a patient is identified as high risk and placed into a care management program, a care manager will call the patient periodically to assess asthma control, adjust asthma medications, and make appointments for needed care or testing. Successful care management can help patients with asthma obtain better outcomes, thereby avoiding up to 40% of their future hospital encounters [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref8">8</xref>].</p>
        <p>A care management program has a limited service capacity and usually enrolls ≤3% of patients [<xref ref-type="bibr" rid="ref9">9</xref>] with a given condition, which places a premium on enrolling at-risk patients. Therefore, the accuracy of the adopted predictive model (or lack thereof) puts an upper bound on the effectiveness of the program. Previously, several researchers have developed several models for projecting asthma-related hospital encounters in patients with asthma [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. Each of these models would consider only a few features, miss more than half of patients who will have future asthma-related hospital encounters, and incorrectly project future asthma-related hospital encounters for many other patients with asthma [<xref ref-type="bibr" rid="ref23">23</xref>]. These errors lead to suboptimal patient outcomes, including hospital encounters and unnecessary health care costs because of unneeded care management program enrollment. When building machine learning models on nonmedical data, people often follow the modeling guideline of checking extensive candidate features to boost model accuracy [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]. Adopting this modeling guideline to the medical domain, we recently constructed a machine learning model on Intermountain Healthcare data to project asthma-related hospital encounters in the following 12-month period in patients with asthma [<xref ref-type="bibr" rid="ref23">23</xref>]. Compared with previous models, our model boosts the area under the receiver operating characteristic curve (AUC) by at least 0.049 to 0.859. Although this is encouraging, it remains unknown whether our modeling guideline is generalizable to other health care systems.</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>This study aims to assess the generalizability of our modeling guideline to Kaiser Permanente Southern California (KPSC). Similar to our Intermountain Healthcare model [<xref ref-type="bibr" rid="ref23">23</xref>], our KPSC model uses administrative and clinical data to project asthma-related hospital encounters (ED visits and hospitalizations) in patients with asthma. The categorical dependent variable has 2 possible values—whether the patient with asthma will have asthma-related hospital encounters in the following 12-month period or not. This study describes the construction and evaluation of our KPSC model.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>The methods adopted in this study are similar to those used in our previous paper [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
      <sec>
        <title>Ethics Approval and Study Design</title>
        <p>In this study, we performed a secondary analysis of computerized administrative and clinical data. This study was approved by the institutional review boards of the University of Washington Medicine and KPSC.</p>
      </sec>
      <sec>
        <title>Patient Population</title>
        <p>As shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>, our patient cohort was based on patients with asthma who were enrolled in a KPSC health plan for any duration between 2015 and 2018. Owing to internal regulatory processes, the patient cohort was restricted to a random sample of 70.00% (397,858/568,369) of eligible patients. This sample size is the maximum that KPSC allows for sharing its data with an institution outside of Kaiser Permanente for research. As the largest integrated health care system in Southern California with 227 clinics and 15 hospitals, KPSC offers care to approximately 19% of Southern California residents [<xref ref-type="bibr" rid="ref28">28</xref>]. A patient was deemed to have asthma in a particular year if the patient had one or more diagnosis codes of asthma (International Classification of Diseases [ICD], Tenth Revision [ICD-10]: J45.x; ICD, Ninth Revision [ICD-9]: 493.0x, 493.1x, 493.8x, 493.9x) recorded in the encounter billing database in that year [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. The exclusion criterion was that the patient died during that year. If a patient had no diagnosis code of asthma in any subsequent year, the patient was deemed to have no asthma in that subsequent year.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>The patient cohort selection process. KPSC: Kaiser Permanente Southern California.</p>
          </caption>
          <graphic xlink:href="medinform_v8i11e22689_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Prediction Target (the Dependent Variable)</title>
        <p>For each patient identified as having asthma in a particular year, the outcome was whether the patient had any asthma-related hospital encounter in the following year. An asthma-related hospital encounter is an ED visit or hospitalization with asthma as the principal diagnosis (ICD-10: J45.x; ICD-9: 493.0x, 493.1x, 493.8x, 493.9x). For every patient with asthma, the patient’s data up to the end of every calendar year were used to project the patient’s outcome in the following year as long as the patient was deemed to have asthma in the previous year and was also enrolled in a KPSC health plan at the end of the previous year.</p>
      </sec>
      <sec>
        <title>Data Set</title>
        <p>For the patients in our patient cohort, we used their entire electronically available patient history at KPSC. At KPSC, various kinds of information on its patients has been recorded in the electronic medical record system since 2010. In addition, we had electronic records of the patients’ diagnosis codes starting from 1981, regardless of whether they were stored in the electronic medical record system. From the research data warehouse at KPSC, we retrieved an administrative and clinical data set, including information regarding our patient cohort’s encounters and medication dispensing at KPSC from 2010 to 2018 and diagnosis codes at KPSC from 1981 to 2018. Owing to regulatory and privacy concerns, the data set is not publicly available.</p>
      </sec>
      <sec>
        <title>Features (Independent Variables)</title>
        <p>We examined 2 types of candidate features—basic and extended. A basic feature and its corresponding extended features differ only in the year of the data used for feature computation. We considered 307 basic candidate features listed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref31">31</xref>]. Covering a wide range of characteristics, these basic candidate features were computed from the structured attributes in our data set. In <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, unless the word <italic>different</italic> shows up, every mention of the number of a given type of item such as medications counts multiplicity. As defined in our previous paper [<xref ref-type="bibr" rid="ref23">23</xref>], major visits for asthma include ED visits and hospitalizations with an asthma diagnosis code and outpatient visits with a primary diagnosis of asthma. Outpatient visits with a secondary but no primary diagnosis of asthma is regarded as minor visits for asthma.</p>
        <p>Every input data instance to the model targets a unique (patient, index year) pair and is employed to forecast the patient’s outcome in the following year. For the (patient, index year) pair, the patient’s primary care provider (PCP), age, and home address were computed as of the end of the index year. The basic candidate features of history of bronchiolitis, the number of years since the first asthma-coded encounter in the data set, premature birth, family history of asthma, and the number of years since the first encounter for chronic obstructive pulmonary disease in the data set were computed using the data from 1981 to the index year. All of the allergy features and the features derived from the problem list were computed using the data from 2010 to the index year. One basic candidate feature was computed using the data in the index and preindex years: the proportion of patients who had asthma-related hospital encounters in the index year out of all of the patients of the patient’s PCP with asthma in the preindex year. The other 277 basic candidate features were computed using the data in the index year.</p>
        <p>In addition to the basic candidate features, we also checked extended candidate features. Our Intermountain Healthcare model [<xref ref-type="bibr" rid="ref23">23</xref>] was built using the extreme gradient boosting (XGBoost) machine learning classification algorithm [<xref ref-type="bibr" rid="ref32">32</xref>]. As detailed in Hastie et al [<xref ref-type="bibr" rid="ref33">33</xref>], XGBoost automatically computes the importance value of every feature as the fractional contribution of the feature to the model. Previously, we showed that ignoring those features with importance values &#60;0.01 led to a little drop in model accuracy [<xref ref-type="bibr" rid="ref23">23</xref>]. Using the basic candidate features and the model construction method described below, we built an initial XGBoost model on KPSC data. As a patient’s demographic features rarely change over time, no extended candidate feature was formed for any of the basic demographic features. For each basic candidate feature that was nondemographic, was computed on the data in the index year, and had an importance value 0.01 in the initial XGBoost model, we computed 2 related extended candidate features, one using the data in the preindex year and another using the data in the year that was 2 years before the index year. The only difference between the extended candidate features and the basic feature is the year of the data used for feature computation. For instance, for the basic candidate feature <italic>number of ED visits in 2016</italic>, the 2 related extended candidate features are the number of ED visits in 2015 and the number of ED visits in 2014. In brief, we formed extended candidate features for only those suitable and important basic candidate features. Our intuition is that among all possible ones that could be formed, these extended candidate features are most promising with regard to additional predictive power. For the other basic candidate features with lower importance values, those extended candidate features that could possibly be formed for them tend to have little extra predictive power and can be ignored. Given the finite data instances available for model training, this feature extending approach avoids a large rise in the number of candidate features, which may cause sample size issues. We considered all of the basic and extended candidate features when building our final predictive model.</p>
      </sec>
      <sec>
        <title>Data Analysis</title>
        <sec>
          <title>Data Preparation</title>
          <p>Peak expiratory flow values are available in our KPSC data set but not in the Intermountain Healthcare data set used in our previous paper [<xref ref-type="bibr" rid="ref23">23</xref>]. On the basis of the upper and lower bounds given by a medical expert (MS) in our team, all peak expiratory flow values &#62;700 were regarded as biologically implausible. Using this criterion and the same data preparation method adopted in our previous paper [<xref ref-type="bibr" rid="ref23">23</xref>], we normalized data, identified biologically implausible values, and set them to missing. As the outcomes were from the following year and the extended candidate features were computed using the data from up to 2 years before the index year, our data set contained 6 years of effective data (2012-2017) over a total of 9 years (2010-2018). In clinical practice, a model is trained on historical data and then applied to future years’ data. To mirror this, the 2012 to 2016 data were used as the training set for model training. The 2017 data were employed as the test set to gauge model performance.</p>
        </sec>
        <sec>
          <title>Performance Metrics</title>
          <p>As shown in the formulas below and <xref ref-type="table" rid="table1">Table 1</xref>, we adopted 6 standard metrics to assess model performance: accuracy, specificity, sensitivity, negative predictive value (NPV), positive predictive value (PPV), and AUC.</p>
          <disp-formula>Accuracy=(TP+TN)/(TP+TN+FP+FN),</disp-formula>
          <disp-formula>Specificity=TN/(TN+FP),</disp-formula>
          <disp-formula>Sensitivity=TP/(TP+FN),</disp-formula>
          <disp-formula>Negative predictive value=TN/(TN+FN),</disp-formula>
          <disp-formula>Positive predictive value=TP/(TP+FP).</disp-formula>
          <p>We performed a 1000-fold bootstrap analysis [<xref ref-type="bibr" rid="ref34">34</xref>] to compute the 95% CIs of these performance measures. We plotted the receiver operating characteristic (ROC) curve to show the tradeoff between sensitivity and specificity.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>The error matrix.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="350"/>
              <col width="350"/>
              <col width="300"/>
              <thead>
                <tr valign="top">
                  <td>Outcome class</td>
                  <td>Asthma-related hospital encounters in the following year</td>
                  <td>No asthma-related hospital encounter in the following year</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Projected asthma-related hospital encounters in the following year</td>
                  <td>TP<sup>a</sup></td>
                  <td>FP<sup>b</sup></td>
                </tr>
                <tr valign="top">
                  <td>Projected no asthma-related hospital encounter in the following year</td>
                  <td>FN<sup>c</sup></td>
                  <td>TN<sup>d</sup></td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>TP: true positive.</p>
              </fn>
              <fn id="table1fn2">
                <p><sup>b</sup>FP: false positive.</p>
              </fn>
              <fn id="table1fn3">
                <p><sup>c</sup>FN: false negative.</p>
              </fn>
              <fn id="table1fn4">
                <p><sup>d</sup>TN: true negative.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Classification Algorithms</title>
          <p>We employed Waikato Environment for Knowledge Analysis (WEKA) Version 3.9 [<xref ref-type="bibr" rid="ref35">35</xref>] to build machine learning models. As a major open source toolkit for machine learning and data mining, WEKA integrates many classic feature selection techniques and machine learning algorithms. We examined the 39 native machine learning classification algorithms in WEKA, as shown in the web-based appendix of our previous paper [<xref ref-type="bibr" rid="ref23">23</xref>] and the XGBoost classification algorithm [<xref ref-type="bibr" rid="ref32">32</xref>] realized in the XGBoost4J package [<xref ref-type="bibr" rid="ref36">36</xref>]. As an ensemble of decision trees, XGBoost implements gradient boosting in a scalable and efficient manner. As XGBoost takes only numerical features as its inputs, we converted every categorical feature to one or more binary features through one-hot encoding before giving the feature to XGBoost. We employed our previously developed automatic and efficient machine learning model selection method [<xref ref-type="bibr" rid="ref37">37</xref>] and the 2012 to 2016 training data to automatically choose, among all of the applicable ones, the classification algorithm, feature selection technique, hyperparameter values, and data balancing method for managing imbalanced data. On average, our method runs 28 times faster and achieves an 11% lower model error rate than the Auto-WEKA automatic model selection method [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>].</p>
        </sec>
        <sec>
          <title>Assessing the Generalizability of our Intermountain Healthcare Model to KPSC</title>
          <p>This study mainly assessed our modeling guideline’s generalizability to KPSC by using the KPSC training set to train several models and assessing their performance on the KPSC test set. In addition, we assessed our Intermountain Healthcare model’s [<xref ref-type="bibr" rid="ref23">23</xref>] generalizability to KPSC. Using the Intermountain Healthcare data set and the top 21 features with an importance value computed by XGBoost ≥0.01, we formerly built a simplified Intermountain Healthcare model [<xref ref-type="bibr" rid="ref23">23</xref>]. The simplified model retained almost all of the predictive power of our full Intermountain Healthcare model. Our KPSC data set included these 21 features but not all of the 142 features used in our full Intermountain Healthcare model. We assessed our simplified Intermountain Healthcare model’s performance on the KPSC test set twice, once after retraining the model on the KPSC training set and once using the model trained on the Intermountain Healthcare data set without retraining the model on the KPSC training set.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Clinical and Demographic Characteristics of the Patient Cohorts</title>
        <p>Every data instance targets a unique (patient, index year) pair. <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> displays the clinical and demographic characteristics of our patient cohort during the time periods of 2012 to 2016 and 2017. The set of characteristics during 2012 to 2016 is similar to that during 2017. During 2012 to 2016 and 2017, 2.42% (18,925/782,762) and 2.13% (4353/204,744) of data instances were associated with asthma-related hospital encounters in the following year, respectively.</p>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows for each clinical or demographic characteristic, the statistical test results on whether the data instances linking to future asthma-related hospital encounters and those linking to no future asthma-related hospital encounter had the same distribution. These 2 sets of data instances had the same distribution when the <italic>P</italic> value is ≥.05, and distinct distributions when the <italic>P</italic> value is &#60;.05. In <xref ref-type="table" rid="table2">Table 2</xref>, all of the <italic>P</italic> values &#60;.05 are marked in italics.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>For each clinical or demographic characteristic, the statistical test results on whether the data instances linking to future asthma-related hospital encounters and those linking to no future asthma-related hospital encounter had the same distribution.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="510"/>
            <col width="230"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Characteristics</td>
                <td><italic>P</italic> value for the 2012-2016 data</td>
                <td><italic>P</italic> value for the 2017 data</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Age (years)</td>
                <td>
                  <italic> &#60;.001</italic>
                  <sup>a,b</sup>
                </td>
                <td>
                  <italic>   &#60;.001</italic>
                  <sup>a</sup>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">Gender</td>
                <td>
                  <italic>   &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>    .01</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">Race</td>
                <td>
                  <italic>     &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>     &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">Ethnicity</td>
                <td>
                  <italic>    &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic> &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">Insurance category</td>
                <td>
                  <italic>    &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>   &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">Number of years since the first asthma-coded encounter in the data set</td>
                <td>.78<sup>a</sup></td>
                <td>
                  <italic>.006</italic>
                  <sup>a</sup>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Asthma medication fill</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inhaled corticosteroid</td>
                <td>
                  <italic>   &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>     &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inhaled corticosteroid and long-acting beta-2 agonist combination</td>
                <td>
                  <italic>      &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>     &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Leukotriene modifier</td>
                <td>
                  <italic>     &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>     &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Long-acting beta-2 agonist</td>
                <td>
                  <italic>     &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>     &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mast cell stabilizer</td>
                <td>&#62;.99<sup>c</sup></td>
                <td>&#62;.99<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Short-acting, inhaled beta-2 agonist</td>
                <td>
                  <italic>&#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>  &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Systemic corticosteroid</td>
                <td>
                  <italic>&#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>&#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Comorbidity</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Allergic rhinitis</td>
                <td>
                  <italic>                 &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>          &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Anxiety or depression</td>
                <td>
                  <italic>  &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>                  &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Bronchopulmonary dysplasia</td>
                <td>
                  <italic>         &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>&#62;.99<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Chronic obstructive pulmonary disease</td>
                <td>
                  <italic>  &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>      &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Cystic fibrosis</td>
                <td>&#62;.99<sup>c</sup></td>
                <td>.52<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Eczema</td>
                <td>
                  <italic>&#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>&#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gastroesophageal reflux</td>
                <td>
                  <italic>  &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>   &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Obesity</td>
                <td>
                  <italic>   &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>   &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Premature birth</td>
                <td>
                  <italic>     &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>    &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sinusitis</td>
                <td>.33<sup>c</sup></td>
                <td>.06<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sleep apnea</td>
                <td>
                  <italic> .003</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>     &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">Smoking status</td>
                <td>
                  <italic>  &#60;.001</italic>
                  <sup>c</sup>
                </td>
                <td>
                  <italic>   &#60;.001</italic>
                  <sup>c</sup>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup><italic>P</italic> values obtained by performing the Cochran-Armitage trend test [<xref ref-type="bibr" rid="ref39">39</xref>].</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup><italic>P</italic> values &#60;.05 marked in italics.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup><italic>P</italic> values obtained by performing the chi-square two-sample test.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Classification Algorithm and Features Used</title>
        <p>Before building our final model, the importance values of the basic candidate features were computed once on our initial XGBoost model. This led to us examining 30 extended candidate features in addition to the 307 basic candidate features. With these 337 basic and extended candidates features as inputs, our automatic model selection method [<xref ref-type="bibr" rid="ref37">37</xref>] picked the XGBoost classification algorithm [<xref ref-type="bibr" rid="ref32">32</xref>]. As an ensemble of decision trees, XGBoost can handle missing feature values naturally. Our final predictive model was built using XGBoost, and the 221 features shown in descending order of importance value in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The other features had no additional predictive power and were automatically dropped by XGBoost.</p>
      </sec>
      <sec>
        <title>Performance Measures of the Final KPSC Model</title>
        <p>On the KPSC test set, our final model achieved an AUC of 0.820 (95% CI 0.813-0.826). <xref rid="figure2" ref-type="fig">Figure 2</xref> displays the ROC curve of our final model. <xref ref-type="table" rid="table3">Table 3</xref> displays the performance measures of our final model when various top percentages of patients having the largest predicted risk were adopted as the cutoff point for performing binary classification. When this percentage was at 10.00% (20,474/204,744), our final model achieved an accuracy of 90.08% (184,435/204,744; 95% CI 89.95-90.21), a sensitivity of 51.90% (2259/4353; 95% CI 50.44-53.42), a specificity of 90.91% (182,176/200,391; 95% CI 90.78-91.03), a PPV of 11.03% (2259/20,474; 95% CI 10.59-11.46), and an NPV of 98.86% (182,176/184,270; 95% CI 98.81-98.91). <xref ref-type="table" rid="table4">Table 4</xref> gives the corresponding error matrix of our final model.</p>
        <p>When we excluded the extended candidate features and considered only the basic candidate features, the AUC of our model dropped to 0.809. Several basic candidate features, such as the number of years since the first asthma-coded encounter in the data set, needed over one year of past data to calculate. When we further excluded these multiyear candidate features and considered only those basic candidate features calculated on 1 year of past data, the model’s AUC dropped to 0.807.</p>
        <p>Without precluding any feature from being considered, the model trained on data from both children (aged &#60;18 years) with asthma and adults (aged ≥18 years) with asthma gained an AUC of 0.815 in children with asthma and an AUC of 0.817 in adults with asthma. In comparison, the model trained only on data from children with asthma gained an AUC of 0.811 in children with asthma. The model trained only on data from adults with asthma gained an AUC of 0.818 in adults with asthma.</p>
        <p>If we adopted only the top 25 features shown in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> with an importance value ≥0.01 and removed the other 312 features, the model’s AUC dropped from 0.820 to 0.800 (95% CI 0.793-0.808). When the top 10.00% (20,474/204,744) of patients having the largest predicted risk were adopted as the cutoff point for doing binary classification, the model’s accuracy dropped from 90.08% (184,435/204,744) to 89.96% (184,185/204,744; 95% CI 89.83-90.08), sensitivity dropped from 51.90% (2259/4353) to 49.02% (2134/4353; 95% CI 47.71-50.55), specificity dropped from 90.91% (182,176/200,391) to 90.85% (182,051/200,391; 95% CI 90.72-90.97), PPV dropped from 11.03% (2259/20,474) to 10.42% (2134/20,474; 95% CI 10.03-10.86), and NPV dropped from 98.86% (182,176/184,270) to 98.80% (182,051/184,270; 95% CI 98.75-98.85).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>The receiver operating characteristic curve of our final predictive model.</p>
          </caption>
          <graphic xlink:href="medinform_v8i11e22689_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>The performance measures of our final predictive model when various top percentages of patients having the largest predicted risk were adopted as the cutoff point for doing binary classification.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="100"/>
            <col width="100"/>
            <col width="0"/>
            <col width="100"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td>Top percentage of patients having the largest predicted risk (%)</td>
                <td>Accuracy (N=204,744), n (%)</td>
                <td>Sensitivity (N=4353), n (%)</td>
                <td>Specificity (N=200,391), n (%)</td>
                <td colspan="3">PPV<sup>a</sup></td>
                <td colspan="2">NPV<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>n (%)</td>
                <td>N</td>
                <td colspan="2">n (%)</td>
                <td>N</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>199,732 (97.55)</td>
                <td>694 (15.94)</td>
                <td>199,038 (99.32)</td>
                <td>694 (33.90)</td>
                <td>2047</td>
                <td colspan="2">199,038 (98.19)</td>
                <td>202,697</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>198,349 (96.88)</td>
                <td>1026 (23.57)</td>
                <td>197,323 (98.47)</td>
                <td>1026 (25.06)</td>
                <td>4094</td>
                <td colspan="2">197,323 (98.34)</td>
                <td>200,650</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>196,831 (96.14)</td>
                <td>1291 (29.66)</td>
                <td>195,540 (97.58)</td>
                <td>1291 (21.02)</td>
                <td>6142</td>
                <td colspan="2">195,540 (98.46)</td>
                <td>198,602</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>195,186 (95.33)</td>
                <td>1492 (34.28)</td>
                <td>193,694 (96.66)</td>
                <td>1492 (18.22)</td>
                <td>8189</td>
                <td colspan="2">193,694 (98.54)</td>
                <td>196,555</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>193,472 (94.49)</td>
                <td>1659 (38.11)</td>
                <td>191,813 (95.72)</td>
                <td>1659 (16.21)</td>
                <td>10,237</td>
                <td colspan="2">191,813 (98.62)</td>
                <td>194,507</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>191,717 (93.64)</td>
                <td>1805 (41.47)</td>
                <td>189,912 (94.77)</td>
                <td>1805 (14.69)</td>
                <td>12,284</td>
                <td colspan="2">189,912 (98.68)</td>
                <td>192,460</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>189,919 (92.76)</td>
                <td>1930 (44.34)</td>
                <td>187,989 (93.81)</td>
                <td>1930 (13.47)</td>
                <td>14,332</td>
                <td colspan="2">187,989 (98.73)</td>
                <td>190,412</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>188,124 (91.88)</td>
                <td>2056 (47.23)</td>
                <td>186,068 (92.85)</td>
                <td>2056 (12.55)</td>
                <td>16,379</td>
                <td colspan="2">186,068 (98.78)</td>
                <td>188,365</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>186,267 (90.98)</td>
                <td>2151 (49.41)</td>
                <td>184,116 (91.88)</td>
                <td>2151 (11.67)</td>
                <td>18,426</td>
                <td colspan="2">184,116 (98.82)</td>
                <td>186,318</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>184,435 (90.08)</td>
                <td>2259 (51.90)</td>
                <td>182,176 (90.91)</td>
                <td>2259 (11.03)</td>
                <td>20,474</td>
                <td colspan="2">182,176 (98.86)</td>
                <td>184,270</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>174,902 (85.42)</td>
                <td>2611 (59.98)</td>
                <td>172,291 (85.98)</td>
                <td>2611 (8.50)</td>
                <td>30,711</td>
                <td colspan="2">172,291 (99.00)</td>
                <td>174,033</td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>165,253 (80.71)</td>
                <td>2905 (66.74)</td>
                <td>162,348 (81.02)</td>
                <td>2905 (7.09)</td>
                <td>40,948</td>
                <td colspan="2">162,348 (99.12)</td>
                <td>163,796</td>
              </tr>
              <tr valign="top">
                <td>25</td>
                <td>155,491 (75.94)</td>
                <td>3143 (72.20)</td>
                <td>152,348 (76.03)</td>
                <td>3143 (6.14)</td>
                <td>51,186</td>
                <td colspan="2">152,348 (99.21)</td>
                <td>153,558</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>PPV: positive predictive value.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>NPV: negative predictive value.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>The error matrix of our final predictive model when the top 10.00% (20,474/204,744) of patients having the largest predicted risk were adopted as the cutoff point for doing binary classification.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="350"/>
            <col width="350"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td>Outcome class</td>
                <td>Asthma-related hospital encounters in the following year</td>
                <td>No asthma-related hospital encounter in the following year</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Projected asthma-related hospital encounters in the following year</td>
                <td>2259</td>
                <td>18,215</td>
              </tr>
              <tr valign="top">
                <td>Projected no asthma-related hospital encounter in the following year</td>
                <td>2094</td>
                <td>182,176</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Performance Measures of the Simplified Intermountain Healthcare Model</title>
        <p>When applying our simplified Intermountain Healthcare model trained on the Intermountain Healthcare data set [<xref ref-type="bibr" rid="ref23">23</xref>] to the KPSC test set without retraining the model on the KPSC training set, the model gained an AUC of 0.751 (95% CI 0.742-0.759). When the top 10.00% (20,474/204,744) of patients having the largest predicted risk were adopted as the cutoff point for doing binary classification, the model achieved an accuracy of 89.64% (183,531/204,744; 95% CI 89.51-89.77), a sensitivity of 41.51% (1807/4353; 95% CI 40.14-42.97), a specificity of 90.68% (181,724/200,391; 95% CI 90.55-90.81), a PPV of 8.83% (1807/20,474; 95% CI 8.44-9.23), and an NPV of 98.62% (181,724/184,270; 95% CI 98.57-98.67).</p>
        <p>After using the KPSC training set to retrain our simplified Intermountain Healthcare model [<xref ref-type="bibr" rid="ref23">23</xref>], the model gained on the KPSC test set an AUC of 0.779 (95% CI 0.772-0.787). When the top 10.00% (20,474/204,744) of patients having the largest predicted risk were adopted as the cutoff point for doing binary classification, the model achieved an accuracy of 89.85% (183,953/204,744; 95% CI 89.71-89.97), a sensitivity of 46.36% (2018/4353; 95% CI 44.89-47.84), a specificity of 90.79% (181,935/200,391; 95% CI 90.65-90.91), a PPV of 9.86% (2018/20,474; 95% CI 9.45-10.25), and an NPV of 98.73% (181,935/184,270; 95% CI 98.68-98.78).</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We used KPSC data to develop a model to forecast asthma-related hospital encounters in the following 12-month period in patients with asthma. <xref ref-type="table" rid="table5">Table 5</xref> shows that, compared with the models formerly built by others [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref22">22</xref>], our final KPSC model gained a higher AUC, that is, our modeling guideline of checking extensive candidate features to boost model accuracy exhibited acceptable generalizability to KPSC. After further enhancement to automatically explain its predictions [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>] and to raise its accuracy, our model could be used to direct asthma care management to help improve patient outcomes and reduce health care costs.</p>
        <p>Asthma affects adults and children differently. Our final model gained a lower AUC in children than in adults. Additional work is required to understand the difference and to boost the prediction accuracy in children.</p>
        <p>We examined 337 basic and extended candidate features. Approximately 65.6% (221/337) of these were used in our final model. Many of the unused features were correlated with the outcome variable but provided no additional predictive power on the KPSC data set beyond those used in our final model.</p>
        <p>In <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, the 8 most important features and several others within the top 25 features reflect the loss of asthma control. This loss of asthma control could be because of the severity of the patient’s asthma. It could also relate to management practices, treatment nonadherence, or socioeconomic factors for which we had no data.</p>
        <p>When using our simplified Intermountain Healthcare model [<xref ref-type="bibr" rid="ref23">23</xref>] without retraining it on the KPSC training set, the model achieved an AUC of 0.751 on the KPSC test set. Despite being 0.069 lower than our final KPSC model’s AUC, this AUC is higher than the AUCs of many previous models for predicting hospitalization and ED visits in patients with asthma (<xref ref-type="table" rid="table5">Table 5</xref>). Therefore, we regard our simplified Intermountain Healthcare model to have acceptable generalizability to KPSC.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Our final Kaiser Permanente Southern California model in comparison with several previous models for forecasting hospitalizations and emergency department visits in patients with asthma.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="120"/>
            <col width="110"/>
            <col width="80"/>
            <col width="80"/>
            <col width="100"/>
            <col width="100"/>
            <col width="80"/>
            <col width="90"/>
            <col width="90"/>
            <col width="70"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Prediction target</td>
                <td>Number of features the model used</td>
                <td>Number of data instances</td>
                <td>Classification algorithm</td>
                <td>The undesirable outcome’s prevalence rate in the whole data set (%)</td>
                <td>AUC<sup>a</sup></td>
                <td>Sensitivity (%)</td>
                <td>Specificity (%)</td>
                <td>PPV<sup>b</sup> (%)</td>
                <td>NPV<sup>c</sup> (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Our final KPSC<sup>d</sup> model</td>
                <td>Asthma-related hospital encounters</td>
                <td>221</td>
                <td>987,506</td>
                <td>XGBoost<sup>e</sup></td>
                <td>23,278 (2.36)</td>
                <td>0.820</td>
                <td>2259 (51.90)</td>
                <td>182,176 (90.91)</td>
                <td>2259 (11.03)</td>
                <td>182,176 (98.86)</td>
              </tr>
              <tr valign="top">
                <td>Our Intermountain Healthcare model [<xref ref-type="bibr" rid="ref23">23</xref>]</td>
                <td>Asthma-related hospital encounters</td>
                <td>142</td>
                <td>334,564</td>
                <td>XGBoost</td>
                <td>12,144 (3.63)</td>
                <td>0.859</td>
                <td>436 (53.69)</td>
                <td>16,955 (91.93)</td>
                <td>436 (22.65)</td>
                <td>16,955 (97.83)</td>
              </tr>
              <tr valign="top">
                <td>Miller et al [<xref ref-type="bibr" rid="ref15">15</xref>]</td>
                <td>Asthma-related hospital encounters</td>
                <td>17</td>
                <td>2821</td>
                <td>Logistic regression</td>
                <td>8.5</td>
                <td>0.81</td>
                <td>—<sup>f</sup></td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Loymans et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td>
                <td>Asthma exacerbation</td>
                <td>7</td>
                <td>611</td>
                <td>Logistic regression</td>
                <td>13</td>
                <td>0.8</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Lieu et al [<xref ref-type="bibr" rid="ref3">3</xref>]</td>
                <td>Asthma-related hospitalization</td>
                <td>7</td>
                <td>16,520</td>
                <td>Proportional hazards regression</td>
                <td>1.8</td>
                <td>0.79</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Schatz et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td>
                <td>Asthma-related hospitalization in children</td>
                <td>5</td>
                <td>4197</td>
                <td>Logistic regression</td>
                <td>1.4</td>
                <td>0.781</td>
                <td>43.9</td>
                <td>89.8</td>
                <td>5.6</td>
                <td>99.1</td>
              </tr>
              <tr valign="top">
                <td>Yurk et al [<xref ref-type="bibr" rid="ref17">17</xref>]</td>
                <td>Lost day or asthma-related hospital encounters</td>
                <td>11</td>
                <td>4888</td>
                <td>Logistic regression</td>
                <td>54</td>
                <td>0.78</td>
                <td>77</td>
                <td>63</td>
                <td>82</td>
                <td>56</td>
              </tr>
              <tr valign="top">
                <td>Eisner et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td>
                <td>Asthma-related ED<sup>g</sup> visit</td>
                <td>3</td>
                <td>2415</td>
                <td>Logistic regression</td>
                <td>18.3</td>
                <td>0.751</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Forno et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td>
                <td>Severe asthma exacerbation</td>
                <td>17</td>
                <td>615</td>
                <td>Scoring</td>
                <td>69.6</td>
                <td>0.75</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Schatz et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td>
                <td>Asthma-related hospitalization in adults</td>
                <td>3</td>
                <td>6904</td>
                <td>Logistic regression</td>
                <td>1.2</td>
                <td>0.712</td>
                <td>44.9</td>
                <td>87.0</td>
                <td>3.9</td>
                <td>99.3</td>
              </tr>
              <tr valign="top">
                <td>Lieu et al [<xref ref-type="bibr" rid="ref3">3</xref>]</td>
                <td>Asthma-related ED visit</td>
                <td>7</td>
                <td>16,520</td>
                <td>Proportional hazards regression</td>
                <td>6.4</td>
                <td>0.69</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Eisner et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td>
                <td>Asthma-related hospitalization</td>
                <td>1</td>
                <td>2858</td>
                <td>Logistic regression</td>
                <td>32.8</td>
                <td>0.689</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Sato et al [<xref ref-type="bibr" rid="ref13">13</xref>]</td>
                <td>Severe asthma exacerbation</td>
                <td>3</td>
                <td>78</td>
                <td>Classification and regression tree</td>
                <td>21</td>
                <td>0.625</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Schatz et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td>
                <td>Asthma-related hospital encounters</td>
                <td>4</td>
                <td>14,893</td>
                <td>Logistic regression</td>
                <td>6.5</td>
                <td>0.614</td>
                <td>25.4</td>
                <td>92.0</td>
                <td>22.0</td>
                <td>93.2</td>
              </tr>
              <tr valign="top">
                <td>Lieu et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td>
                <td>Asthma-related hospital encounters</td>
                <td>4</td>
                <td>7141</td>
                <td>Classification and regression tree</td>
                <td>6.9</td>
                <td>—</td>
                <td>49.0</td>
                <td>83.6</td>
                <td>18.5</td>
                <td>—</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>PPV: positive predictive value.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>NPV: negative predictive value.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>KPSC: Kaiser Permanente Southern California.</p>
            </fn>
            <fn id="table5fn5">
              <p><sup>e</sup>XGBoost: extreme gradient boosting.</p>
            </fn>
            <fn id="table5fn6">
              <p><sup>f</sup>The original paper presenting the model did not report the performance measure.</p>
            </fn>
            <fn id="table5fn7">
              <p><sup>g</sup>ED: emergency department.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Comparison With Previous Work</title>
        <p>Multiple researchers have built models to forecast ED visits and hospitalizations in patients with asthma [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref23">23</xref>]. <xref ref-type="table" rid="table5">Table 5</xref> compares our final KPSC model with those models, which encompass all pertinent models covered in the systematic review of Loymans et al [<xref ref-type="bibr" rid="ref18">18</xref>]. With the exception of our Intermountain Healthcare model [<xref ref-type="bibr" rid="ref23">23</xref>], every model formerly built by others [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref22">22</xref>] gained a lower AUC than our final KPSC model. Instead of being for all patients with asthma, the model by Miller et al [<xref ref-type="bibr" rid="ref15">15</xref>] targets adults with difficult-to-treat or severe asthma, 8.5% of whom had future asthma-related hospital encounters. The model by Loymans et al [<xref ref-type="bibr" rid="ref10">10</xref>] predicts asthma exacerbations with a prevalence rate of 13%. These 2 prevalence rates of the undesirable outcome are much higher than that in our KPSC data set. In addition, the target patient population and the prediction target of these 2 models are not comparable with those in our KPSC model. Except for these 2 models, each of the other models formerly built by others had an AUC ≤0.79, which is at least 0.030 lower than that of our KPSC model.</p>
        <p>Compared with other models, the model by Yurk et al [<xref ref-type="bibr" rid="ref17">17</xref>] gained a larger PPV and sensitivity mainly because of the use of a distinct prediction target: hospital encounters or one or more days lost because of missed work or reduced activities for asthma. This prediction target was easier to predict, as it occurred in 54% of the patients with asthma. If the model by Yurk et al [<xref ref-type="bibr" rid="ref17">17</xref>] were used to predict asthma-related hospital encounters that occurred with approximately 2% of the patients with asthma, we would expect the model to gain a lower sensitivity and PPV.</p>
        <p>Excluding the model by Yurk et al [<xref ref-type="bibr" rid="ref17">17</xref>], all of the other models formerly built by others had a sensitivity ≤49%, which is smaller than what our final KPSC model gained: 51.90% (2259/4353). Sensitivity provides, among all patients with asthma who will have future asthma-related hospital encounters, the proportion of patients that the model pinpoints. As the population of patients with asthma is large, for every 1% increase in the identified proportion of patients with asthma who would have future asthma-related hospital encounters, effective care management could help improve patient outcomes, thereby avoiding up to 7200 more ED visits and 1970 more hospitalizations in the United States annually [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref8">8</xref>].</p>
        <p>The PPV depends substantially on the prevalence rate of undesirable outcomes [<xref ref-type="bibr" rid="ref42">42</xref>]. In our KPSC test data set, 2.13% (4353/204,744) of patients with asthma had future asthma-related hospital encounters. When the top 10.00% (20,474/204,744) of patients having the largest predicted risk were adopted as the cutoff point for performing binary classification, the maximum possible PPV that a perfect model could obtain is 21.26% (4353/20,474). Our final KPSC model gained a PPV of 11.03% (2259/20,474), which is 51.90% (2259/4353) of the maximum possible PPV. In comparison, in our Intermountain Healthcare test data set, 4.22% of patients with asthma had future asthma-related hospital encounters [<xref ref-type="bibr" rid="ref23">23</xref>]. Our Intermountain Healthcare model gained a PPV of 22.65% (436/1925) [<xref ref-type="bibr" rid="ref23">23</xref>], which is 53.7% (436/812) of the maximum possible PPV that a perfect model could obtain. On a data set in which 6.5% of patients with asthma had future asthma-related hospital encounters, the model by Schatz et al [<xref ref-type="bibr" rid="ref20">20</xref>] gained a PPV of 22.0%. On a data set in which 6.9% of patients with asthma had future asthma-related hospital encounters, the model by Lieu et al [<xref ref-type="bibr" rid="ref19">19</xref>] gained a PPV of 18.5%. Except for these PPVs and the PPV of the model by Yurk et al [<xref ref-type="bibr" rid="ref17">17</xref>], none of the previously reported PPVs was more than 5.6%.</p>
        <p>Despite being built using the same modeling guideline, our final KPSC model gained a lower AUC than our Intermountain Healthcare model [<xref ref-type="bibr" rid="ref23">23</xref>]. This is largely because the percentage of data instances in the test set linking to future asthma-related hospital encounters differs greatly at Intermountain Healthcare and at KPSC: 4.22% (812/19,256) versus 2.13% (4353/204,744), respectively. The rarer the undesirable outcome, the harder it is to accurately predict it.</p>
        <p>The top features with an importance value ≥0.01 in our final KPSC model are similar to those in our Intermountain Healthcare model [<xref ref-type="bibr" rid="ref23">23</xref>]. In both our final KPSC and our Intermountain Healthcare models, many top features involve asthma medications and previous ED visits. When building our Intermountain Healthcare model, we did not consider several basic candidate features. They turned out to be top features in our final KPSC model and impacted the importance values and ranks of the other top features there.</p>
        <p>When building our Intermountain Healthcare model, we did not incorporate any extended candidate features. Several such features appeared as top features in our final KPSC model. Their inclusion boosted the model accuracy on our KPSC data set. It is possible that including extended candidate features could also boost the model accuracy on our Intermountain Healthcare data set. This could be explored in future work.</p>
        <p>Schatz et al [<xref ref-type="bibr" rid="ref20">20</xref>] showed that in 2 Southern California cities, 6.5% of patients with asthma at KPSC had asthma-related hospital encounters in 2000. In comparison, 2.08% (4353/208,959) of patients with asthma at KPSC had asthma-related hospital encounters in 2018. This suggests that compared with 2 decades ago, KPSC manages patients with asthma better now.</p>
      </sec>
      <sec>
        <title>Considerations About Potential Clinical Use</title>
        <p>Although more accurate than those formerly built by others, our final KPSC model still gained a somewhat low PPV of 11.03% (2259/20,474). However, our model could be clinically useful:</p>
        <list list-type="order">
          <list-item>
            <p>A PPV of 11.03% (2259/20,474) is acceptable for pinpointing high-risk patients with asthma to apply low-cost preventive interventions. Examples of such interventions include giving the patient a peak flow meter for self-monitoring at home and showing the patient how to use it, instructing the patient on the correct use of an asthma inhaler, asking a nurse to follow up on the patient with extra phone calls, and training the patient to write a diary on environmental triggers.</p>
          </list-item>
          <list-item>
            <p>As explained above, because of the low prevalence rate of the undesirable outcome used in this study, even a perfect model would gain a small PPV. For this outcome, sensitivity matters more than PPV for judging the model’s possible clinical impact. Our final KPSC model gained a higher sensitivity than all of the models that were formerly built by others and used a comparable prediction target.</p>
          </list-item>
          <list-item>
            <p>To allocate care management resources, health care systems such as the University of Washington Medicine, Kaiser Permanente Northern California [<xref ref-type="bibr" rid="ref3">3</xref>], and Intermountain Healthcare are using proprietary models whose performance measures are akin to those of the models previously built by others. Our final KPSC model is more accurate than these models.</p>
          </list-item>
        </list>
        <p>Our final KPSC model used 221 features. Cutting this number could facilitate the clinical deployment of the model. In this regard, if one could bear a small drop in prediction accuracy, one could adopt the top features having an importance value of, for example, 0.01 or more and remove the others. The importance value of a feature changes across health care systems. Ideally, before deciding which features to keep, one should first compute the importance values of the features on a data set from the intended health care system.</p>
        <p>Most of the attributes that we used to compute the features adopted in our final KPSC model, particularly the top features, are routinely collected by electronic medical record systems. For future work, to make it easy for other health care systems to reuse our final KPSC model, we can resort to the Observational Medical Outcomes Partnership (OMOP) common data model [<xref ref-type="bibr" rid="ref43">43</xref>]. This data model and its linked standardized terminologies [<xref ref-type="bibr" rid="ref44">44</xref>] standardize administrative and clinical attributes from at least 10 large US health care systems [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. We can extend this data model to include the attributes that are used in our final KPSC model but missed by the original data model. We rewrite our feature construction and model building code based on the extended OMOP common data model and post our code and the related data schema on a public website. After converting its data into our extended OMOP common data model format based on this data schema, a health care system can rerun our code on its data to obtain a simplified version of our final KPSC model tailored to its data. Hopefully, most of the predictive power of our final KPSC model can be retained similar to what this study showed for our Intermountain Healthcare model.</p>
        <p>It is difficult to interpret an XGBoost model employing many features globally, as is the case with many other involved machine learning models. As an interesting topic for future work, we plan to use our previously proposed method [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>] to automatically explain our final KPSC model’s predictions for each patient with asthma.</p>
        <p>Our final KPSC model was an XGBoost model [<xref ref-type="bibr" rid="ref32">32</xref>]. When classifying 2 unbalanced classes, XGBoost employs a hyperparameter scale_pos_weight to balance their weights [<xref ref-type="bibr" rid="ref47">47</xref>]. To maximize the AUC of our KPSC model, our automatic model selection method [<xref ref-type="bibr" rid="ref37">37</xref>] changed scale_pos_weight from its default value to balance the 2 classes of having future asthma-related hospital encounters or not [<xref ref-type="bibr" rid="ref48">48</xref>]. As a side effect, this shrank the model’s projected probabilities of having future asthma-related hospital encounters to a large extent and made them differ greatly from the actual probabilities [<xref ref-type="bibr" rid="ref48">48</xref>]. This does not affect the identification of the top few percent of patients with asthma who have the largest projected risk to receive care management or other preventive interventions. We could keep scale_pos_weight at its default value of 1 and not balance the 2 classes. This would avoid the side effect but drop the model’s AUC from 0.820 to 0.817 (95% CI 0.810-0.824).</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study has 3 limitations, all of which provide interesting areas for future work:</p>
        <list list-type="order">
          <list-item>
            <p>In addition to those examined in this study, other features could also help raise model accuracy. Our KPSC data set does not include some potentially relevant features, such as characteristics of the patient’s home environment and features computed on the data gathered by monitoring sensors attached to the patient’s body. It would be worthwhile to identify new predictive features from various data sources.</p>
          </list-item>
          <list-item>
            <p>Our study used only non-deep learning machine learning algorithms and structured data. Using deep learning and including features computed from unstructured clinical notes may further boost model accuracy [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref49">49</xref>].</p>
          </list-item>
          <list-item>
            <p>Our study assessed our modeling guideline’s generalizability to only one health care system. It would be interesting to evaluate our modeling guideline’s generalizability to other health care systems, such as academic health care systems that have different properties from KPSC and Intermountain Healthcare. Compared with nonacademic health care systems, academic health care systems tend to care for sicker and more complex patients [<xref ref-type="bibr" rid="ref50">50</xref>]. To perform such an evaluation, we are working on obtaining a data set of patients with asthma from the University of Washington Medicine [<xref ref-type="bibr" rid="ref49">49</xref>].</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In its first generalizability assessment, our modeling guideline of examining extensive candidate features to help boost model accuracy exhibited acceptable generalizability to KPSC. Compared with the models formerly built by others, our KPSC model for projecting asthma-related hospital encounters in patients with asthma gained a higher AUC. At present, predictive models are widely used as a core component of a decision support tool to prospectively pinpoint high-risk patients with asthma for preventive care via care management. After further enhancement, our KPSC model could be used to replace the existing predictive models in the decision support tool for better directing asthma care management to help improve patient outcomes and reduce health care costs.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>The basic candidate features, the clinical and demographic characteristics of our patient cohort, and the features employed in our final predictive model and their importance values.</p>
        <media xlink:href="medinform_v8i11e22689_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 171 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ED</term>
          <def>
            <p>emergency department</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">FN</term>
          <def>
            <p>false negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FP</term>
          <def>
            <p>false positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">KPSC</term>
          <def>
            <p>Kaiser Permanente Southern California</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NPV</term>
          <def>
            <p>negative predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">OMOP</term>
          <def>
            <p>Observational Medical Outcomes Partnership</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">PCP</term>
          <def>
            <p>primary care provider</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">TN</term>
          <def>
            <p>true negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">TP</term>
          <def>
            <p>true positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">WEKA</term>
          <def>
            <p>Waikato Environment for Knowledge Analysis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">XGBoost</term>
          <def>
            <p>extreme gradient boosting</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors would like to thank Lee J Barton, Don McCarthy, Xia X Li, and Michael D Johnson for useful discussions and helping to retrieve the KPSC data set. GL, CN, MS, RZ, ER, and CK were partially supported by the National Heart, Lung, and Blood Institute of the National Institutes of Health under award number R01HL142503. The funders had no role in the study design, data collection and analysis, decision to publish, or preparation of the manuscript.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>GL was mainly responsible for this study. He conceptualized and designed the study, performed the literature review and data analysis, and wrote the paper. CK, CN, WC, MS, ER, and RZ provided feedback on various medical issues, contributed to conceptualizing the presentation, and revised the paper. CK and CN took part in retrieving the KPSC data set and interpreting its detected peculiarities.</p>
      </fn>
      <fn fn-type="conflict">
        <p>RZ reports grants from Aerocrine, grants and personal fees from Genentech, grants and personal fees from MedImmune of AstraZeneca, grants and personal fees from Merck, personal fees from Novartis, personal fees from Regeneron Pharmaceuticals, grants and personal fees from GlaxoSmithKline, grants from ALK Pharma, and grants from TEVA Pharmaceutical Industries Ltd outside this study.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moorman</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Akinbami</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Zahran</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>National surveillance of asthma: United States, 2001-2010</article-title>
          <source>Vital Health Stat 3</source>
          <year>2012</year>
          <month>11</month>
          <issue>35</issue>
          <fpage>1</fpage>
          <lpage>58</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/nchs/data/series/sr_03/sr03_035.pdf"/>
          </comment>
          <pub-id pub-id-type="medline">24252609</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nurmagambetov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kuwahara</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Garbe</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The economic burden of asthma in the United States, 2008-2013</article-title>
          <source>Ann Am Thorac Soc</source>
          <year>2018</year>
          <month>03</month>
          <volume>15</volume>
          <issue>3</issue>
          <fpage>348</fpage>
          <lpage>56</lpage>
          <pub-id pub-id-type="doi">10.1513/AnnalsATS.201703-259OC</pub-id>
          <pub-id pub-id-type="medline">29323930</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lieu</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Quesenberry</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Sorel</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Mendoza</surname>
              <given-names>GR</given-names>
            </name>
            <name name-style="western">
              <surname>Leong</surname>
              <given-names>AB</given-names>
            </name>
          </person-group>
          <article-title>Computer-based models to identify high-risk children with asthma</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>1998</year>
          <month>04</month>
          <volume>157</volume>
          <issue>4 Pt 1</issue>
          <fpage>1173</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1164/ajrccm.157.4.9708124</pub-id>
          <pub-id pub-id-type="medline">9563736</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mays</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Claxton</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Managed care rebound? Recent changes in health plans' cost containment strategies</article-title>
          <source>Health Aff (Millwood)</source>
          <year>2004</year>
          <volume>Suppl Web Exclusives</volume>
          <fpage>W4</fpage>
          <lpage>427</lpage>
          <pub-id pub-id-type="doi">10.1377/hlthaff.w4.427</pub-id>
          <pub-id pub-id-type="medline">15451964</pub-id>
          <pub-id pub-id-type="pii">hlthaff.w4.427</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Caloyeras</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Exum</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Broderick</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mattke</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Managing manifest diseases, but not health risks, saved PepsiCo money over seven years</article-title>
          <source>Health Aff (Millwood)</source>
          <year>2014</year>
          <month>01</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>124</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.1377/hlthaff.2013.0625</pub-id>
          <pub-id pub-id-type="medline">24395944</pub-id>
          <pub-id pub-id-type="pii">33/1/124</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Greineder</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Loane</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Parks</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>A randomized controlled trial of a pediatric asthma outreach program</article-title>
          <source>J Allergy Clin Immunol</source>
          <year>1999</year>
          <month>03</month>
          <volume>103</volume>
          <issue>3 Pt 1</issue>
          <fpage>436</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1016/s0091-6749(99)70468-9</pub-id>
          <pub-id pub-id-type="medline">10069877</pub-id>
          <pub-id pub-id-type="pii">S0091-6749(99)70468-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Morrow</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Shults</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nakas</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Strope</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Adelman</surname>
              <given-names>RD</given-names>
            </name>
          </person-group>
          <article-title>Outcomes evaluation of a comprehensive intervention program for asthmatic children enrolled in medicaid</article-title>
          <source>Pediatrics</source>
          <year>2000</year>
          <month>05</month>
          <volume>105</volume>
          <issue>5</issue>
          <fpage>1029</fpage>
          <lpage>35</lpage>
          <pub-id pub-id-type="doi">10.1542/peds.105.5.1029</pub-id>
          <pub-id pub-id-type="medline">10790458</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Axelrod</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Zimbro</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Chetney</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Sabol</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ainsworth</surname>
              <given-names>VJ</given-names>
            </name>
          </person-group>
          <article-title>A disease management program utilizing life coaches for children with asthma</article-title>
          <source>J Clin Outcomes Manag</source>
          <year>2001</year>
          <volume>8</volume>
          <issue>6</issue>
          <fpage>38</fpage>
          <lpage>42</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/284394600_A_disease_management_program_utilising_life_coaches_for_children_with_asthma"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Axelrod</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Vogel</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Predictive modeling in health plans</article-title>
          <source>Dis Manag Health Outcomes</source>
          <year>2003</year>
          <volume>11</volume>
          <issue>12</issue>
          <fpage>779</fpage>
          <lpage>87</lpage>
          <pub-id pub-id-type="doi">10.2165/00115677-200311120-00003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loymans</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Honkoop</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Termeer</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Snoeck-Stroband</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Assendelft</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Schermer</surname>
              <given-names>TRJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>KF</given-names>
            </name>
            <name name-style="western">
              <surname>Sousa</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Sterk</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Reddel</surname>
              <given-names>HK</given-names>
            </name>
            <name name-style="western">
              <surname>Sont</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Ter Riet</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Identifying patients at risk for severe exacerbations of asthma: development and external validation of a multivariable prediction model</article-title>
          <source>Thorax</source>
          <year>2016</year>
          <month>09</month>
          <volume>71</volume>
          <issue>9</issue>
          <fpage>838</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1136/thoraxjnl-2015-208138</pub-id>
          <pub-id pub-id-type="medline">27044486</pub-id>
          <pub-id pub-id-type="pii">thoraxjnl-2015-208138</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schatz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cook</surname>
              <given-names>EF</given-names>
            </name>
            <name name-style="western">
              <surname>Joshua</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Petitti</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Risk factors for asthma hospitalizations in a managed care organization: development of a clinical prediction rule</article-title>
          <source>Am J Manag Care</source>
          <year>2003</year>
          <month>08</month>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>538</fpage>
          <lpage>47</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ajmc.com/pubMed.php?pii=2500"/>
          </comment>
          <pub-id pub-id-type="medline">12921231</pub-id>
          <pub-id pub-id-type="pii">2500</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eisner</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Yegin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Trzaskoma</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Severity of asthma score predicts clinical outcomes in patients with moderate to severe persistent asthma</article-title>
          <source>Chest</source>
          <year>2012</year>
          <month>01</month>
          <volume>141</volume>
          <issue>1</issue>
          <fpage>58</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1378/chest.11-0020</pub-id>
          <pub-id pub-id-type="medline">21885725</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(12)60014-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sato</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tomita</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sano</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ichihashi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yamagata</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sano</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yamagata</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Miyara</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Iwanaga</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Muraki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tohda</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>The strategy for predicting future exacerbation of asthma using a combination of the asthma control test and lung function test</article-title>
          <source>J Asthma</source>
          <year>2009</year>
          <month>09</month>
          <volume>46</volume>
          <issue>7</issue>
          <fpage>677</fpage>
          <lpage>82</lpage>
          <pub-id pub-id-type="doi">10.1080/02770900902972160</pub-id>
          <pub-id pub-id-type="medline">19728204</pub-id>
          <pub-id pub-id-type="pii">914289288</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Osborne</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Pedula</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>O'Hollaren</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ettinger</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Stibolt</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Buist</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Vollmer</surname>
              <given-names>WM</given-names>
            </name>
          </person-group>
          <article-title>Assessing future need for acute care in adult asthmatics: the profile of asthma risk study: a prospective health maintenance organization-based study</article-title>
          <source>Chest</source>
          <year>2007</year>
          <month>10</month>
          <volume>132</volume>
          <issue>4</issue>
          <fpage>1151</fpage>
          <lpage>61</lpage>
          <pub-id pub-id-type="doi">10.1378/chest.05-3084</pub-id>
          <pub-id pub-id-type="medline">17573515</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(15)36707-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Blanc</surname>
              <given-names>PD</given-names>
            </name>
            <name name-style="western">
              <surname>Pasta</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gujrathi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Barron</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wenzel</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>ST</given-names>
            </name>
            <collab>TENOR Study Group</collab>
          </person-group>
          <article-title>TENOR risk score predicts healthcare in adults with severe or difficult-to-treat asthma</article-title>
          <source>Eur Respir J</source>
          <year>2006</year>
          <month>12</month>
          <volume>28</volume>
          <issue>6</issue>
          <fpage>1145</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.1183/09031936.06.00145105</pub-id>
          <pub-id pub-id-type="medline">16870656</pub-id>
          <pub-id pub-id-type="pii">09031936.06.00145105</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Markson</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Allen-Ramey</surname>
              <given-names>FC</given-names>
            </name>
            <name name-style="western">
              <surname>Vollmer</surname>
              <given-names>WM</given-names>
            </name>
          </person-group>
          <article-title>Using an asthma control questionnaire and administrative data to predict health-care utilization</article-title>
          <source>Chest</source>
          <year>2006</year>
          <month>04</month>
          <volume>129</volume>
          <issue>4</issue>
          <fpage>918</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1378/chest.129.4.918</pub-id>
          <pub-id pub-id-type="medline">16608939</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(15)38806-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yurk</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Diette</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Skinner</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Dominici</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Steinwachs</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>AW</given-names>
            </name>
          </person-group>
          <article-title>Predicting patient-reported asthma outcomes for adults in managed care</article-title>
          <source>Am J Manag Care</source>
          <year>2004</year>
          <month>05</month>
          <volume>10</volume>
          <issue>5</issue>
          <fpage>321</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ajmc.com/pubMed.php?pii=2600"/>
          </comment>
          <pub-id pub-id-type="medline">15152702</pub-id>
          <pub-id pub-id-type="pii">2600</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loymans</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Debray</surname>
              <given-names>TP</given-names>
            </name>
            <name name-style="western">
              <surname>Honkoop</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Termeer</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Snoeck-Stroband</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Schermer</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Assendelft</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Timp</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>KF</given-names>
            </name>
            <name name-style="western">
              <surname>Sousa</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Sont</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Sterk</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Reddel</surname>
              <given-names>HK</given-names>
            </name>
            <name name-style="western">
              <surname>Ter Riet</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Exacerbations in adults with asthma: a systematic review and external validation of prediction models</article-title>
          <source>J Allergy Clin Immunol Pract</source>
          <year>2018</year>
          <volume>6</volume>
          <issue>6</issue>
          <fpage>1942</fpage>
          <lpage>52.e15</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jaip.2018.02.004</pub-id>
          <pub-id pub-id-type="medline">29454163</pub-id>
          <pub-id pub-id-type="pii">S2213-2198(18)30096-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lieu</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Capra</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Quesenberry</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Mendoza</surname>
              <given-names>GR</given-names>
            </name>
            <name name-style="western">
              <surname>Mazar</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Computer-based models to identify high-risk adults with asthma: is the glass half empty of half full?</article-title>
          <source>J Asthma</source>
          <year>1999</year>
          <month>06</month>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>359</fpage>
          <lpage>70</lpage>
          <pub-id pub-id-type="doi">10.3109/02770909909068229</pub-id>
          <pub-id pub-id-type="medline">10386500</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schatz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nakahiro</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Joshua</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Petitti</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Asthma population management: development and validation of a practical 3-level risk stratification scheme</article-title>
          <source>Am J Manag Care</source>
          <year>2004</year>
          <month>01</month>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>25</fpage>
          <lpage>32</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ajmc.com/pubMed.php?pii=2474"/>
          </comment>
          <pub-id pub-id-type="medline">14738184</pub-id>
          <pub-id pub-id-type="pii">2474</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grana</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Preston</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>PD</given-names>
            </name>
            <name name-style="western">
              <surname>Hanchak</surname>
              <given-names>NA</given-names>
            </name>
          </person-group>
          <article-title>The use of administrative data to risk-stratify asthmatic patients</article-title>
          <source>Am J Med Qual</source>
          <year>1997</year>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>113</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1177/0885713X9701200205</pub-id>
          <pub-id pub-id-type="medline">9161058</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Forno</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Fuhlbrigge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Soto-Quirós</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Avila</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Raby</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Brehm</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sylvia</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Celedón</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Risk factors and predictive clinical scores for asthma exacerbations in childhood</article-title>
          <source>Chest</source>
          <year>2010</year>
          <month>11</month>
          <volume>138</volume>
          <issue>5</issue>
          <fpage>1156</fpage>
          <lpage>65</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20472862"/>
          </comment>
          <pub-id pub-id-type="doi">10.1378/chest.09-2426</pub-id>
          <pub-id pub-id-type="medline">20472862</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(10)60593-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC2972623</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Nkoy</surname>
              <given-names>FL</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>MD</given-names>
            </name>
          </person-group>
          <article-title>Developing a model to predict hospital encounters for asthma in asthmatic patients: secondary analysis</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>01</month>
          <day>21</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>e16080</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/1/e16080/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16080</pub-id>
          <pub-id pub-id-type="medline">31961332</pub-id>
          <pub-id pub-id-type="pii">v8i1e16080</pub-id>
          <pub-id pub-id-type="pmcid">PMC7001050</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mayfield</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>McNamee</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Piatko</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Named Entity Recognition Using Hundreds of Thousands of Features</article-title>
          <source>Proceedings of the Seventh Conference on Natural Language Learning</source>
          <year>2003</year>
          <conf-name>CoNLL'03</conf-name>
          <conf-date>May 31-June 1, 2003</conf-date>
          <conf-loc>Edmonton, Canada</conf-loc>
          <pub-id pub-id-type="doi">10.3115/1119176.1119205</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Abbott</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Zavala</surname>
              <given-names>VM</given-names>
            </name>
          </person-group>
          <article-title>Machine learning algorithms for liquid crystal-based sensors</article-title>
          <source>ACS Sens</source>
          <year>2018</year>
          <month>11</month>
          <day>26</day>
          <volume>3</volume>
          <issue>11</issue>
          <fpage>2237</fpage>
          <lpage>45</lpage>
          <pub-id pub-id-type="doi">10.1021/acssensors.8b00100</pub-id>
          <pub-id pub-id-type="medline">30289249</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tsang</surname>
              <given-names>IW</given-names>
            </name>
          </person-group>
          <article-title>The emerging 'big dimensionality'</article-title>
          <source>IEEE Comput Intell Mag</source>
          <year>2014</year>
          <month>08</month>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>14</fpage>
          <lpage>26</lpage>
          <pub-id pub-id-type="doi">10.1109/mci.2014.2326099</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hansson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yella</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dougherty</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fleyeh</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Machine learning algorithms in heavy process manufacturing</article-title>
          <source>Am J Intell Syst</source>
          <year>2016</year>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.5923/j.ajis.20160601.01</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koebnick</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Langer-Gould</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Gould</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Chao</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Iyer</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Sociodemographic characteristics of members of a large, integrated health care system: comparison with US Census Bureau data</article-title>
          <source>Perm J</source>
          <year>2012</year>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>37</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23012597"/>
          </comment>
          <pub-id pub-id-type="doi">10.7812/tpp/12-031</pub-id>
          <pub-id pub-id-type="medline">23012597</pub-id>
          <pub-id pub-id-type="pmcid">PMC3442759</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Desai</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Nichols</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Lieu</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>Diabetes and asthma case identification, validation, and representativeness when using electronic health data to construct registries for comparative effectiveness and epidemiologic research</article-title>
          <source>Med Care</source>
          <year>2012</year>
          <month>07</month>
          <issue>50 Suppl</issue>
          <fpage>S30</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22692256"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MLR.0b013e318259c011</pub-id>
          <pub-id pub-id-type="medline">22692256</pub-id>
          <pub-id pub-id-type="pii">00005650-201207001-00009</pub-id>
          <pub-id pub-id-type="pmcid">PMC4671076</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wakefield</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Cloutier</surname>
              <given-names>MM</given-names>
            </name>
          </person-group>
          <article-title>Modifications to HEDIS and CSTE algorithms improve case recognition of pediatric asthma</article-title>
          <source>Pediatr Pulmonol</source>
          <year>2006</year>
          <month>10</month>
          <volume>41</volume>
          <issue>10</issue>
          <fpage>962</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1002/ppul.20476</pub-id>
          <pub-id pub-id-type="medline">16871628</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Andrews</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Simpson</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Basco</surname>
              <given-names>WT</given-names>
            </name>
            <name name-style="western">
              <surname>Teufel</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Asthma medication ratio predicts emergency department visits and hospitalizations in children with asthma</article-title>
          <source>Medicare Medicaid Res Rev</source>
          <year>2013</year>
          <volume>3</volume>
          <issue>4</issue>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24834366"/>
          </comment>
          <pub-id pub-id-type="doi">10.5600/mmrr.003.04.a05</pub-id>
          <pub-id pub-id-type="medline">24834366</pub-id>
          <pub-id pub-id-type="pii">mmrr2013-003-04-a05</pub-id>
          <pub-id pub-id-type="pmcid">PMC4011648</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>XGBoost: A Scalable Tree Boosting System</article-title>
          <source>Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2016</year>
          <conf-name>KDD'16</conf-name>
          <conf-date>August 13-17, 2016</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>The Elements of Statistical Learning: Data Mining, Inference, and Prediction. Second Edition</source>
          <year>2016</year>
          <publisher-loc>New York, USA</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Steyerberg</surname>
              <given-names>EW</given-names>
            </name>
          </person-group>
          <source>Clinical Prediction Models: A Practical Approach to Development, Validation, and Updating. Second Edition</source>
          <year>2019</year>
          <publisher-loc>New York, USA</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Witten</surname>
              <given-names>IH</given-names>
            </name>
            <name name-style="western">
              <surname>Frank</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Pal</surname>
              <given-names>CJ</given-names>
            </name>
          </person-group>
          <source>Data Mining: Practical Machine Learning Tools and Techniques. Fourth Edition</source>
          <year>2016</year>
          <publisher-loc>Burlington, MA</publisher-loc>
          <publisher-name>Morgan Kaufmann</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <source>XGBoost JVM package</source>
          <year>2020</year>
          <access-date>2020-10-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://xgboost.readthedocs.io/en/latest/jvm/index.html">https://xgboost.readthedocs.io/en/latest/jvm/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Progressive sampling-based Bayesian optimization for efficient and automatic machine learning model selection</article-title>
          <source>Health Inf Sci Syst</source>
          <year>2017</year>
          <month>12</month>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>2</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29038732"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13755-017-0023-z</pub-id>
          <pub-id pub-id-type="medline">29038732</pub-id>
          <pub-id pub-id-type="pii">23</pub-id>
          <pub-id pub-id-type="pmcid">PMC5617811</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thornton</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hutter</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hoos</surname>
              <given-names>HH</given-names>
            </name>
            <name name-style="western">
              <surname>Leyton-Brown</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Auto-WEKA: Combined Selection and Hyperparameter Optimization of Classification Algorithms</article-title>
          <source>Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2013</year>
          <conf-name>KDD'13</conf-name>
          <conf-date>August 11-14, 2013</conf-date>
          <conf-loc>Chicago, IL</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2487575.2487629</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agresti</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>Categorical Data Analysis. Third Edition</source>
          <year>2012</year>
          <publisher-loc>Hoboken, NJ</publisher-loc>
          <publisher-name>Wiley</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Automatically explaining machine learning prediction results: a demonstration on type 2 diabetes risk prediction</article-title>
          <source>Health Inf Sci Syst</source>
          <year>2016</year>
          <volume>4</volume>
          <fpage>2</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26958341"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13755-016-0015-4</pub-id>
          <pub-id pub-id-type="medline">26958341</pub-id>
          <pub-id pub-id-type="pii">15</pub-id>
          <pub-id pub-id-type="pmcid">PMC4782293</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A roadmap for semi-automatically extracting predictive and clinically meaningful temporal features from medical data for predictive modeling</article-title>
          <source>Glob Transit</source>
          <year>2019</year>
          <volume>1</volume>
          <fpage>61</fpage>
          <lpage>82</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31032483"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.glt.2018.11.001</pub-id>
          <pub-id pub-id-type="medline">31032483</pub-id>
          <pub-id pub-id-type="pmcid">PMC6482973</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ranganathan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Aggarwal</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Common pitfalls in statistical analysis: understanding the properties of diagnostic tests – part 1</article-title>
          <source>Perspect Clin Res</source>
          <year>2018</year>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>40</fpage>
          <lpage>3</lpage>
          <pub-id pub-id-type="doi">10.4103/picr.picr_170_17</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="web">
          <source>Observational Medical Outcomes Partnership (OMOP) Common Data Model</source>
          <year>2020</year>
          <access-date>2020-10-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://omop.org/CDM">http://omop.org/CDM</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <article-title>Vocabularies</article-title>
          <source>Observational Medical Outcomes Partnership (OMOP)</source>
          <year>2020</year>
          <access-date>2020-10-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://omop.org/Vocabularies">http://omop.org/Vocabularies</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Duke</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Reich</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Huser</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Schuemie</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Suchard</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>ICK</given-names>
            </name>
            <name name-style="western">
              <surname>Rijnbeek</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>van der Lei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pratt</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Norén</surname>
              <given-names>GN</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Stang</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Madigan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>PB</given-names>
            </name>
          </person-group>
          <article-title>Observational health data sciences and informatics (OHDSI): opportunities for observational researchers</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2015</year>
          <volume>216</volume>
          <fpage>574</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26262116"/>
          </comment>
          <pub-id pub-id-type="medline">26262116</pub-id>
          <pub-id pub-id-type="pmcid">PMC4815923</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Overhage</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Reich</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Hartzema</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Stang</surname>
              <given-names>PE</given-names>
            </name>
          </person-group>
          <article-title>Validation of a common data model for active safety surveillance research</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2012</year>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>54</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22037893"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000376</pub-id>
          <pub-id pub-id-type="medline">22037893</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000376</pub-id>
          <pub-id pub-id-type="pmcid">PMC3240764</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="web">
          <article-title>Parameters</article-title>
          <source>XGBoost</source>
          <year>2020</year>
          <access-date>2020-10-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://xgboost.readthedocs.io/en/latest/parameter.html">https://xgboost.readthedocs.io/en/latest/parameter.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="web">
          <article-title>Notes on Parameter Tuning</article-title>
          <source>XGBoost</source>
          <year>2020</year>
          <access-date>2020-10-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://xgboost.readthedocs.io/en/latest/tutorials/param_tuning.html">https://xgboost.readthedocs.io/en/latest/tutorials/param_tuning.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Koebnick</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Au</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Sheng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Murtaugh</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Sward</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Schatz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zeiger</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Davidson</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Nkoy</surname>
              <given-names>FL</given-names>
            </name>
          </person-group>
          <article-title>Using temporal features to provide data-driven clinical early warnings for chronic obstructive pulmonary disease and asthma care management: protocol for a secondary analysis</article-title>
          <source>JMIR Res Protoc</source>
          <year>2019</year>
          <month>06</month>
          <day>6</day>
          <volume>8</volume>
          <issue>6</issue>
          <fpage>e13783</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchprotocols.org/2019/6/e13783/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/13783</pub-id>
          <pub-id pub-id-type="medline">31199308</pub-id>
          <pub-id pub-id-type="pii">v8i6e13783</pub-id>
          <pub-id pub-id-type="pmcid">PMC6592592</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Forgione</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Younis</surname>
              <given-names>MZ</given-names>
            </name>
          </person-group>
          <article-title>A comparative analysis of the CVP structure of nonprofit teaching and for-profit non-teaching hospitals</article-title>
          <source>J Health Care Finance</source>
          <year>2012</year>
          <volume>39</volume>
          <issue>1</issue>
          <fpage>12</fpage>
          <lpage>38</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/233533296_A_comparative_analysis_of_the_CVP_structure_of_nonprofit_teaching_and_for-profit_non-teaching_hospitals"/>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
