<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i6e15431</article-id>
      <article-id pub-id-type="pmid">32554386</article-id>
      <article-id pub-id-type="doi">10.2196/15431</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Ensemble Learning Models Based on Noninvasive Features for Type 2 Diabetes Screening: Model Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Öberg</surname>
            <given-names>Ulrika</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chao</surname>
            <given-names>Dyna</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Van halteren</surname>
            <given-names>Aart</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Santos</surname>
            <given-names>Jéssica</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>Tianzhou</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9581-2498</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Li</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5029-2134</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Yi</surname>
            <given-names>Liwei</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6539-5662</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Feng</surname>
            <given-names>Huawei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3494-4590</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Shimeng</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7303-3987</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Haoyu</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4614-0661</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Zhu</surname>
            <given-names>Junfeng</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5845-0969</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>Jian</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9607-2337</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>Yingyue</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3808-4425</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Hongsheng</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>School of Life Science</institution>
            <institution>Liaoning University</institution>
            <addr-line>No. 66, Chongshan Middle road</addr-line>
            <addr-line>Shenyang, 110036</addr-line>
            <country>China</country>
            <phone>86 024 62202280</phone>
            <fax>86 024 62202280</fax>
            <email>liuhongsheng@lnu.edu.cn</email>
          </address>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9242-6508</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>School of Life Science</institution>
        <institution>Liaoning University</institution>
        <addr-line>Shenyang</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>School of Information</institution>
        <institution>Liaoning University</institution>
        <addr-line>Shenyang</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Research Center for Computer Simulating and Information Processing of Bio-macromolecules of Shenyang</institution>
        <institution>Liaoning University</institution>
        <addr-line>Shenyang</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Engineering Laboratory for Molecular Simulation and Designing of Drug Molecules of Liaoning</institution>
        <addr-line>Shenyang</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Hongsheng Liu <email>liuhongsheng@lnu.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>6</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>18</day>
        <month>6</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>6</issue>
      <elocation-id>e15431</elocation-id>
      <history>
        <date date-type="received">
          <day>10</day>
          <month>7</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>11</day>
          <month>11</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>22</day>
          <month>12</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>7</day>
          <month>2</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Tianzhou Yang, Li Zhang, Liwei Yi, Huawei Feng, Shimeng Li, Haoyu Chen, Junfeng Zhu, Jian Zhao, Yingyue Zeng, Hongsheng Liu. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 18.06.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2020/6/e15431" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Early diabetes screening can effectively reduce the burden of disease. However, natural population–based screening projects require a large number of resources. With the emergence and development of machine learning, researchers have started to pursue more flexible and efficient methods to screen or predict type 2 diabetes.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to build prediction models based on the ensemble learning method for diabetes screening to further improve the health status of the population in a noninvasive and inexpensive manner.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The dataset for building and evaluating the diabetes prediction model was extracted from the National Health and Nutrition Examination Survey from 2011-2016. After data cleaning and feature selection, the dataset was split into a training set (80%, 2011-2014), test set (20%, 2011-2014) and validation set (2015-2016). Three simple machine learning methods (linear discriminant analysis, support vector machine, and random forest) and easy ensemble methods were used to build diabetes prediction models. The performance of the models was evaluated through 5-fold cross-validation and external validation. The Delong test (2-sided) was used to test the performance differences between the models.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We selected 8057 observations and 12 attributes from the database. In the 5-fold cross-validation, the three simple methods yielded highly predictive performance models with areas under the curve (AUCs) over 0.800, wherein the ensemble methods significantly outperformed the simple methods. When we evaluated the models in the test set and validation set, the same trends were observed. The ensemble model of linear discriminant analysis yielded the best performance, with an AUC of 0.849, an accuracy of 0.730, a sensitivity of 0.819, and a specificity of 0.709 in the validation set.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study indicates that efficient screening using machine learning methods with noninvasive tests can be applied to a large population and achieve the objective of secondary prevention.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>type 2 diabetes</kwd>
        <kwd>screening</kwd>
        <kwd>non-invasive attributes</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Diabetes is a heterogeneous metabolic disorder that is characterized by the presence of hyperglycemia due to impairment of insulin secretion, defective insulin action, or both [<xref ref-type="bibr" rid="ref1">1</xref>]. The high blood glucose level caused by diabetes not only affects the heart, eyes, kidneys, and nerves but also is associated with increased rates of cancer, physical and cognitive disabilities [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>], tuberculosis [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>], and depression [<xref ref-type="bibr" rid="ref7">7</xref>]; these conditions are associated with high health care costs [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. For patients with type 2 diabetes, the risks of death and cardiovascular events are 2-4 times greater than in the general population [<xref ref-type="bibr" rid="ref10">10</xref>]. Due to the aging population, lifestyle changes, and interrelated rapid unplanned urbanization, the prevalence of diabetes is quickly increasing worldwide [<xref ref-type="bibr" rid="ref11">11</xref>]. According to the latest International Diabetes Federation Diabetes Atlas, there were approximately 420 million people aged 20-79 years with diabetes worldwide in 2017, and this number is expected to rise to 629 million in 2045. Furthermore, approximately 50% of diabetes patients are undiagnosed [<xref ref-type="bibr" rid="ref12">12</xref>]. Patients with type 2 diabetes who are within target ranges for 5 risk factor variables, namely glycated hemoglobin levels, systolic and diastolic blood pressure, albuminuria, smoking, and low-density lipoprotein cholesterol levels, appear to have little or no excess risk of death, myocardial infarction, or stroke compared with the general population [<xref ref-type="bibr" rid="ref13">13</xref>]. Therefore, developing an appropriate method to screen people without clinical symptoms is necessary and practical; such a screening method could reduce health care costs and patient mortality and improve patients’ quality of life through earlier clinic-based management.</p>
      <p>Generally, traditional screening projects are based on studies in epidemiology, such as the ADDITION trial study [<xref ref-type="bibr" rid="ref14">14</xref>] and the Ely study [<xref ref-type="bibr" rid="ref15">15</xref>]. These screening studies cost hundreds of thousands of dollars and require the collaboration of many people. With the emergence and development of machine learning, researchers have started to pursue more flexible and efficient methods to screen or predict type 2 diabetes. Han et al [<xref ref-type="bibr" rid="ref16">16</xref>] trained a type 2 diabetes diagnosis model with features mainly consisting of blood tests such as hemoglobin A1<sub>c</sub> and total cholesterol, yielding a precision of 0.942 and a recall of 0.939. Maniruzzaman et al [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>] trained a type 2 diabetes prediction model using Pima Indian data with plasma glucose features; they obtained an accuracy of 81.97% and an area under the curve (AUC) of 0.93. A machine learning–based framework was also developed to identify patients with type 2 diabetes in the clinic with electronic health records, showing an AUC of 0.98 with more than 110 clinical features [<xref ref-type="bibr" rid="ref19">19</xref>]. Zou et al [<xref ref-type="bibr" rid="ref20">20</xref>] used principal component analysis and minimum redundancy maximum relevance to reduce the dimensionality and achieve the best accuracy in their model (0.81) in addition to using fasting blood sugar as the main feature. Many of the abovementioned studies achieved high prediction performance with blood tests; however, none of them used only noninvasive attributes to predict type 2 diabetes. Chung et al [<xref ref-type="bibr" rid="ref21">21</xref>] developed a model to screen prediabetes using support vector machines with only noninvasive features, such as age, sex, and family history of diabetes, and they obtained an AUC of 0.76 in the external test data; however, further exploration and optimization are needed to improve type 2 diabetes screening models that only use noninvasive features.</p>
      <p>To better screen potential patients with type 2 diabetes, further delay disease progression, control relative complications, and improve human health, in this paper, type 2 diabetes screening machine learning models and conforming easy ensemble models were built that require only an individual noninvasive test, combined with data from body measurements and questionnaires, to predict type 2 diabetes based on the National Health and Nutrition Examination Survey (NHANES) database, thus avoiding blood tests and clinic visits. Inexpensive screening of people who have type 2 diabetes without obvious symptoms may lead to secondary prevention.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Analysis</title>
        <p>The data were analyzed with R version 3.3.1 for Linux with the R packages dplyr, caret (Classification And REgression Training) [<xref ref-type="bibr" rid="ref22">22</xref>], randomForest [<xref ref-type="bibr" rid="ref23">23</xref>], pROC [<xref ref-type="bibr" rid="ref24">24</xref>], e1071 [<xref ref-type="bibr" rid="ref25">25</xref>], gplots, unbalanced [<xref ref-type="bibr" rid="ref26">26</xref>], epiDisplay, and MASS. The Delong test for 2 correlated receiver operating characteristic (ROC) curves was used to determine the effects of the easy ensemble methods; a <italic>P</italic> value &#60;0.05 was considered significant (2-sided). The work protocol consisted of 5 steps: data cleaning, sample selection, chosen features, model training, and validation.</p>
      </sec>
      <sec>
        <title>Data</title>
        <p>The data were obtained from the NHANES database. The detailed steps of data cleaning and feature selection are shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. First, before all the NHANES data were processed, the database contained 25,054 samples from 2011 to 2016 with 985 features. Second, data samples with missing observations for baseline variables, such as blood glucose, age, sex, height, and weight, were removed. Third, 3 new variables were computed, namely diabetes (whether a person has diabetes: 1=yes, 0=no), hypertension (whether a person has hypertension: 1=yes, 0=no) and relative leg length. The case group was defined as having fasting blood glucose levels ≥7.0 millimoles per liter, and the fasting blood glucose levels in the control group were &#60;6.1 mmol/L [<xref ref-type="bibr" rid="ref1">1</xref>]. Hypertension was defined according to the American Heart Association criteria as systolic blood pressure ≥130 millimeters of mercury or diastolic blood pressure ≥80 mm Hg obtained on more than 2 occasions [<xref ref-type="bibr" rid="ref27">27</xref>]. The relative leg length was the ratio of the upper leg length to the height multiplied by 100 [<xref ref-type="bibr" rid="ref28">28</xref>]. Fourth and fifth, we set the inclusion and exclusion criteria to control for bias. The inclusion criteria were as follows: patients aged 18-80 years from the case and control groups. The following exclusion criteria were employed: patients with cancer, due to the positive association between hyperglycemia and cancer [<xref ref-type="bibr" rid="ref29">29</xref>], and patients with liver conditions, because liver conditions can also influence blood glucose levels [<xref ref-type="bibr" rid="ref30">30</xref>]. These individuals were excluded because they are traditionally asymptomatic and their blood glucose levels are not representative of the study population. After the data processing steps (1-5), 10,710 observations and 988 features without type 2 diabetes were left for analysis.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>The data cleaning and feature selection process. Note that the feature selection process was run only in the NHANES 2011-2014 dataset. n: number of cases. p: number of features.</p>
          </caption>
          <graphic xlink:href="medinform_v8i6e15431_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Feature Selection</title>
        <p>The selection of features is one of the most critical steps in model building. Thus, additional feature selection steps were taken. First, because only noninvasive features were used, the laboratory variables were deleted, and 756 features were left. Secondly, we used the <italic>t</italic> test to select continuous variables and the chi-square test to select the categorical variables for crude feature selection with <italic>P</italic>&#60;.05; this resulted in 270 remaining features. Third, the variables whose missing values were greater than 10% were removed, leaving 204 features. Fourth, the missing and biased values (including answers in the questionnaire such as “refused” and “don’t know”) were deleted, leaving 8057 samples. Finally, forward conditional logistic regression was employed to further filter the features that were selected in the former steps with <italic>P</italic>&#60;.05 only in the NHANES 2011-2014 dataset. After the feature selection process, 12 features remained. We separated the final dataset into three parts: the training set (80%, 2011-2014) with 3582 negative and 664 positive observations, the test set (20%, 2011-2014) with 895 negative and 165 positive observations, and the external validation set (2015-2016) with 2244 negative and 507 positive observations; the whole 2011-2014 data set was randomly divided into the training set and test set using the createDataPartition function in the caret package [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      </sec>
      <sec>
        <title>Machine Learning and the Easy Ensemble Method</title>
        <p>In this study, binary logistic regression was used to select the risk factors for diabetes, and the linear discriminant analysis, random forest, and support vector machine methods as well as their ensemble methods were developed to classify the case and control groups according to the selected features. The linear discriminant analysis structure was based on the lda function of the R package MASS, the support vector machine structure was based on the svm function of the R package e1071, and the random forest structure was based on the rf function of the R package randomForest. The parameter adjustments of the support vector machine and random forest were applied with the R package caret. We used 80% of the 2011-2014 NHANES data for model training under 100 repeated 5-fold cross-validations. The remaining 20% of the 2011-2014 NHANES data were used as the test set, and the 2015-2016 NHANES data were reserved as the validation set for performance measurement.</p>
        <sec>
          <title>Logistic Regression</title>
          <p>As an extension of linear regression, logistic regression is a commonly used method to obtain the risk or protection factors for disease in epidemiology [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. According to the experimental design, this logic function was divided into unconditional and conditional logistic regressions; according to the type of dependent variables, it was divided into binary logistic regression and multiple logistic regression. The logistic function is an effective method for classification problems and gives the odds ratio (OR) of the significance variable according to the dependent variable.</p>
          <p>In this study, binary unconditional logistic regression was used to select the risk factors for or relative features of diabetes. In the logistic regression, the 204 attributes chosen from the <italic>t</italic> test and chi-square test were considered as the independent variables, and whether a person has diabetes was the dependent variable. Twelve features were left.</p>
        </sec>
        <sec>
          <title>Linear Discriminant Analysis</title>
          <p>Linear discriminant analysis was first introduced by Fisher [<xref ref-type="bibr" rid="ref33">33</xref>] in 1936 to address taxonomic problems. Generally, it is a combination of analysis of variance and regression analysis. Linear discriminant analysis is based on the theory of transformation from high dimensions to low dimensions. As a classification algorithm, its theoretical basis is that the protection points of each type of data are as close to each other as possible, while the distance between different kinds of data are as far apart as possible. In this case, the classification was based on whether a person has diabetes. Therefore, the linear discriminant analysis reduced the 12 features to the 1(k–1, k=2) dimension to discriminate patients with diabetes.</p>
        </sec>
        <sec>
          <title>Random Forest</title>
          <p>Random forest, which is based on decision trees [<xref ref-type="bibr" rid="ref34">34</xref>], is a well-known ensemble learning method that uses the bagging method [<xref ref-type="bibr" rid="ref35">35</xref>]. The basic theory of the bagging method is as follows: assuming a dataset contains N observations, for example, 100 subsets can be extracted wherein every subset comprises n (n=N) observations that were sampled randomly with replacement from the original dataset, and 100 base classifiers can be built with these 100 subsets to vote for the classification of every sample in the dataset. The decision trees are the base classifier in the bagging method in the random forest. This basic algorithm can be considered as a single tree model with if-then structures. Each decision tree of the RF yields its own classification outcome and “vote,” and the average of all the results is the final taxonomy.</p>
          <p>The caret package in R was applied to search for the best parameter in the random forest with 5-fold cross-validation repeated 100 times. The number of trees was 500, and the best number of variables randomly sampled as candidates at each split was 4 after the parameter selection.</p>
        </sec>
        <sec>
          <title>Support Vector Machine</title>
          <p>Support vector machines [<xref ref-type="bibr" rid="ref36">36</xref>] are among the most popular supervised learning techniques in the machine learning field. A support vector machine reflects the data to a higher-dimensional space with a kernel function. The classification mission relies on the training data, which are called support vectors. For general 2-class problems, the observations are determined by a hyperplane with the maximizing margin through the nearest support vectors.</p>
          <p>In this study, the radial basis kernel was chosen. The caret package of R was also used to match the parameter with the best AUC performance in the support vector machine model with 5-fold cross-validation repeated 100 times. The optimal cost and gamma parameter values obtained for the model were 0.137 and 0.012, respectively.</p>
        </sec>
        <sec>
          <title>Easy Ensemble Method</title>
          <p>Type 2 diabetes screening is an unbalanced problem because there are fewer patients than healthy individuals. To address the unbalanced issue, we employed the easy ensemble method [<xref ref-type="bibr" rid="ref37">37</xref>]. In short, we randomly sampled the same number of all positive observations from the negative observations and made the two groups correspond to a minor dataset in the train set. We then repeated the above step 100 times to generate 100 minor datasets. Next, we built 100 same-method models based on these datasets. Furthermore, for 5-fold cross-validation, the prevalence probability of every sample was averaged by these 100 models in every validation for both the test set and validation set.</p>
        </sec>
      </sec>
      <sec>
        <title>Model Evaluation</title>
        <p>In this article, we used the ROC curve, AUC, sensitivity, specificity, accuracy, and positive predictive value (PPV) to measure the performance of the models. The cutoff value was selected based on the maximal value of the Youden index [<xref ref-type="bibr" rid="ref38">38</xref>] in the training set.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>After the data cleaning and feature selection process, the dataset included 8057 cases that were divided into three sets: 80% of the NHANES 2011-2014 data for the training set, 20% of the NHANES 2011-2014 data for the test set, and the NHANES 2015-2016 data for the validation set. After crude feature selection with the <italic>t</italic> test and chi-square test in the 2011-2014 NHANES dataset, logistic regression analysis was further performed to assess the related factors of type 2 diabetes; this process ensures that there will be no overfitting or generalization of the model for future patients. The 12 selected factors are shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Factors associated with diabetes used to build the models.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="350"/>
          <col width="260"/>
          <col width="260"/>
          <col width="130"/>
          <thead>
            <tr valign="top">
              <td>
                Feature
              </td>
              <td>Crude<sup>a</sup> OR<sup>b</sup> (95% CI)</td>
              <td>Adjusted<sup>c</sup> OR (95% CI)</td>
              <td><italic>P</italic> value</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Age</td>
              <td>1.05 (1.05-1.06)</td>
              <td>1.05 (1.04-1.06)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Sex</td>
              <td>0.82 (0.70-0.97)</td>
              <td>0.62 (0.50-0.76)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Waistline</td>
              <td>1.04 (1.03-1.05)</td>
              <td>0.99 (0.97-1.01)</td>
              <td>.27</td>
            </tr>
            <tr valign="top">
              <td>Sagittal abdominal diameter</td>
              <td>1.20 (1.18-1.22)</td>
              <td>1.16 (1.09-1.24)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Relative leg length</td>
              <td>0.70 (0.66-0.74)</td>
              <td>0.85 (0.79-0.91)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>60 second pulse</td>
              <td>1.02 (1.01-1.02)</td>
              <td>1.02 (1.01-1.03)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Smoking</td>
              <td>0.74 (0.63-0.88)</td>
              <td>1.13 (0.92-1.38)</td>
              <td>.26</td>
            </tr>
            <tr valign="top">
              <td>Alcohol</td>
              <td>1.43 (1.19-1.72)</td>
              <td>1.31 (1.04-1.66)</td>
              <td>.02</td>
            </tr>
            <tr valign="top">
              <td>Hypertension</td>
              <td>3.26 (2.72-3.90)</td>
              <td>1.02 (0.82-1.27)</td>
              <td>.86</td>
            </tr>
            <tr valign="top">
              <td>Family history</td>
              <td>0.28 (0.24-0.34)</td>
              <td>0.32 (0.26-0.39)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>General health condition</td>
              <td>2.05 (1.88-2.24)</td>
              <td>1.59 (1.44-1.76)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Control or loss of weight</td>
              <td>0.42 (0.35-0.51)</td>
              <td>0.55 (0.44-0.69)</td>
              <td>&#60;.001</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table1fn1">
            <p><sup>a</sup>Crude: 1-way logistic regression.</p>
          </fn>
          <fn id="table1fn2">
            <p><sup>b</sup>OR: odds radio.</p>
          </fn>
          <fn id="table1fn3">
            <p><sup>c</sup>Adjusted: multiple logistic regression.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <p>The risk of having type 2 diabetes increases with increased age (95% CI 1.04-1.06, <italic>P</italic>&#60;.001), sagittal abdominal diameter (95% CI 1.09-1.24, <italic>P</italic>&#60;.001), pulse (95% CI 1.01-1.03, <italic>P</italic>&#60;.001), and alcohol use (95% CI 1.04-1.66, <italic>P</italic>=.02) as well as poorer general health condition (95% CI 1.44-1.76, <italic>P</italic>&#60;.001). In contrast, female sex, longer relative leg length, lack of type 2 diabetes family history, and control of weight are the protection factors of type 2 diabetes (95% CI 0.50-0.76, 0.79-0.91, 0.26-0.39, and 0.44-0.69, respectively; <italic>P</italic>&#60;.001 in all cases). We built three different models using linear discriminant analysis, random forest, and support vector machine methods to determine type 2 diabetes risk using the training set with these noninvasive tests. Afterward, the test set and external validation set were used to measure the predictive ability of the models.</p>
      <p>We generated six models with three different machine learning methods as well as corresponding ensemble methods in the training set. The 5-fold cross-validation results in <xref ref-type="table" rid="table2">Table 2</xref> show that the linear discriminant analysis method yielded the best AUC compared with the random forest and support vector machine methods not only with the simple methods but also with the easy ensemble methods. However, the ensemble method improvements in the different methods are in the order of support vector machine &#62; random forest &#62; linear discriminant analysis. In 5-fold cross-validation, the simple linear discriminant analysis method showed 0.844 AUC, 74.1% sensitivity, 79.5% specificity, 78.7% accuracy, and 40.2% PPV; the ensemble linear discriminant analysis method showed 0.845 AUC, 79.7% sensitivity, 73.5% specificity, 74.5% accuracy, and 35.8% PPV. The simple random forest method showed 0.823 AUC, 86.2% sensitivity, 61.2% specificity, 65.1% accuracy, and 29.2% PPV; its ensemble method showed 0.834 AUC, 78.4% sensitivity, 73.2% specificity, 74.0% accuracy, and 35.2% PPV. The simple support vector machine method showed 0.808 AUC, 69.2% sensitivity, 81.1% specificity, 79.2% accuracy, and 40.5% PPV; the ensemble support vector machine method showed 0.842 AUC, 78.7% sensitivity, 74.8% specificity, 75.4% accuracy, and 36.7% PPV. The line graph in <xref rid="figure2" ref-type="fig">Figure 2</xref> shows that the AUC improved with accumulation of the models, and the values remained stable after the composition of approximately 10 models.</p>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Average results (SD) of the 5-fold cross-validation of the models in the training set.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="270"/>
          <col width="160"/>
          <col width="150"/>
          <col width="120"/>
          <col width="140"/>
          <col width="130"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Method</td>
              <td>AUC<sup>a</sup></td>
              <td>Sensitivity</td>
              <td>Specificity</td>
              <td>Accuracy</td>
              <td>PPV<sup>b</sup></td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="7">
                <bold>Simple methods</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Linear discriminant analysis</td>
              <td>0.844 (0.016)</td>
              <td>0.741 (0.035)</td>
              <td>0.795 (0.015)</td>
              <td>0.787 (0.013)</td>
              <td>0.402 (0.020)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Random forest</td>
              <td>0.823 (0.016)</td>
              <td>0.862 (0.029)</td>
              <td>0.612 (0.019)</td>
              <td>0.651 (0.015)</td>
              <td>0.292 (0.011)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Support vector machine</td>
              <td>0.808 (0.015)</td>
              <td>0.692 (0.035)</td>
              <td>0.811 (0.017)</td>
              <td>0.792 (0.014)</td>
              <td>0.405 (0.023)</td>
            </tr>
            <tr valign="top">
              <td colspan="7">
                <bold>Ensemble methods</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>EE<sup>c</sup> linear discriminant analysis</td>
              <td>0.845 (0.016)</td>
              <td>0.797 (0.032)</td>
              <td>0.735 (0.016)</td>
              <td>0.745 (0.014)</td>
              <td>0.358 (0.017)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>EE random forest</td>
              <td>0.834 (0.016)</td>
              <td>0.784 (0.033)</td>
              <td>0.732 (0.016)</td>
              <td>0.740 (0.014)</td>
              <td>0.352 (0.016)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>EE support vector machine</td>
              <td>0.842 (0.016)</td>
              <td>0.787 (0.034)</td>
              <td>0.748 (0.017)</td>
              <td>0.754 (0.014)</td>
              <td>0.367 (0.018)</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table2fn1">
            <p><sup>a</sup>AUC: area under the curve.</p>
          </fn>
          <fn id="table2fn2">
            <p><sup>b</sup>PPV: positive predictive value.</p>
          </fn>
          <fn id="table2fn3">
            <p><sup>c</sup>EE: easy ensemble method.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <fig id="figure2" position="float">
        <label>Figure 2</label>
        <caption>
          <p>Comparison of the top 50 models with the easy ensemble method and the simple method with different machine learning methods and 5-fold cross-validation in the training set. AUC: area under the curve. LDA: linear discriminant analysis. RF: random forest. SVM: support vector machine.</p>
        </caption>
        <graphic xlink:href="medinform_v8i6e15431_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p/>
      <p>The 5-fold cross-validation indicated that the different models show reliable capability. Similarly, the AUCs of the developed models range from 0.810-0.850 in the test and validation datasets, indicating their stability and extensibility for predicting the risk of new patients with type 2 diabetes. Furthermore, when considering the performance of the easy ensemble methods in the test set (<xref ref-type="table" rid="table3">Table 3</xref>), these methods appeared to predict type 2 diabetes more efficiently than the other methods. For the random forest and support vector machine methods, the easy ensemble methods provided significantly better AUC values than the respective simple methods (absolute AUC improvement 0.014, z=3.062, <italic>P</italic>=.002 and 0.07, z=5.010, <italic>P</italic>&#60;.001, respectively), as determined by the Delong test for two correlated ROC curves (2-sided). However, the LDA improvement was not significant (z=1.252, <italic>P</italic>=.21) according to the Delong test. In the validation set (<xref ref-type="table" rid="table3">Table 3</xref>), we found a similar pattern. The easy ensemble methods improved the overall predictive performance by 0.004 (z=2.734, <italic>P</italic>=.006) for linear discriminant analysis, 0.008 (z=2.991, <italic>P</italic>=.002) for random forest, and 0.037 (z=5.908, <italic>P</italic>&#60;.001) for support vector machine.</p>
      <p>The results indicate that the ensemble methods can be used to screen large populations for type 2 diabetes based on their significantly improved performance in the tests for the random forest and support vector machine methods and in the external validation set for the linear discriminant analysis, random forest, and support vector machine methods. For better and easier application of type 2 diabetes screening, a screening website based on the ensemble method has been established [<xref ref-type="bibr" rid="ref39">39</xref>].</p>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Performance of the simple and ensemble methods in the text and validation sets.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="30"/>
          <col width="310"/>
          <col width="90"/>
          <col width="160"/>
          <col width="150"/>
          <col width="140"/>
          <col width="90"/>
          <thead>
            <tr valign="top">
              <td colspan="3">Method</td>
              <td>AUC<sup>a</sup></td>
              <td>Sensitivity</td>
              <td>Specificity</td>
              <td>Accuracy</td>
              <td>PPV<sup>b</sup></td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="8">
                <bold>Test set</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="7">
                <bold>Simple methods</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Linear discriminant analysis</td>
              <td>0.864</td>
              <td>0.697</td>
              <td>0.829</td>
              <td>0.808</td>
              <td>0.429</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Random forest</td>
              <td>0.836</td>
              <td>0.830</td>
              <td>0.648</td>
              <td>0.676</td>
              <td>0.303</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Support vector machine</td>
              <td>0.796</td>
              <td>0.630</td>
              <td>0.864</td>
              <td>0.827</td>
              <td>0.460</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="7">
                <bold>Ensemble methods</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>EE<sup>c</sup> linear discriminant analysis</td>
              <td>0.867</td>
              <td>0.758</td>
              <td>0.777</td>
              <td>0.774</td>
              <td>0.385</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>EE random forest</td>
              <td>0.850</td>
              <td>0.776</td>
              <td>0.770</td>
              <td>0.771</td>
              <td>0.383</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>EE support vector machine</td>
              <td>0.861</td>
              <td>0.752</td>
              <td>0.783</td>
              <td>0.778</td>
              <td>0.390</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Validation set</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="7">
                <bold>Simple methods</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Linear discriminant analysis</td>
              <td>0.846</td>
              <td>0.759</td>
              <td>0.762</td>
              <td>0.761</td>
              <td>0.418</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Random forest</td>
              <td>0.828</td>
              <td>0.888</td>
              <td>0.594</td>
              <td>0.648</td>
              <td>0.331</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Support vector machine</td>
              <td>0.811</td>
              <td>0.720</td>
              <td>0.789</td>
              <td>0.776</td>
              <td>0.435</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="7">
                <bold>Ensemble methods</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>EE<sup>c</sup> linear discriminant analysis</td>
              <td>0.849</td>
              <td>0.819</td>
              <td>0.709</td>
              <td>0.730</td>
              <td>0.389</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>EE random forest</td>
              <td>0.836</td>
              <td>0.813</td>
              <td>0.713</td>
              <td>0.731</td>
              <td>0.390</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>EE support vector machine</td>
              <td>0.848</td>
              <td>0.824</td>
              <td>0.714</td>
              <td>0.734</td>
              <td>0.394</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup>AUC: area under the curve.</p>
          </fn>
          <fn id="table3fn2">
            <p><sup>b</sup>PPV: positive predictive value.</p>
          </fn>
          <fn id="table3fn3">
            <p><sup>c</sup>EE: easy ensemble method.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>The results of one analysis predicted that the world ranking of the number of years of life lost due to diabetes will increase from 15th to 7th [<xref ref-type="bibr" rid="ref40">40</xref>] by 2040. The fact that type 2 diabetes damages health conditions deserves special attention. In this article, we generated type 2 diabetes screening models and applied them to a large population. Although some researchers [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref20">20</xref>] have studied machine learning models for screening and predicting type 2 diabetes, most of their studies focused on improving performance by selecting many features, such as blood test results, instead of considering the practical significance of cost and flexibility. In contrast, we used a noninvasive test covering demographic factors, body measurements, and questionnaire variables to build our models; this addresses the shortcomings of using invasive tests. Jai Won Chung et al [<xref ref-type="bibr" rid="ref21">21</xref>] also adopted noninvasive features to predict prediabetes, including age, gender, family history of diabetes, hypertension, alcohol intake, BMI, smoking status, waist circumference, and physical activity; they obtained a best AUC of 0.76 in the external test data. However, the attributes they chose were relatively traditional compared with those chosen in this study; in addition, the similarities between prediabetes and healthy cases can result in lower AUC values. The validation of our models indicates that body measurements and questionnaire questions can be used to predict whether a person has type 2 diabetes. In the case of further effects resulting from high blood sugar conditions, the models can be used to screen the identified people.</p>
      </sec>
      <sec>
        <title>Principal Results</title>
        <p>In the feature selection process in this study, traditional analyses such as the <italic>t</italic> test, chi-square test, and binary logistic regression were used. We extracted unusual attributes related to type 2 diabetes, such as sagittal abdominal diameter, relative leg length, and heart rate, which were proven to be significant in similar studies [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>], in addition to some common risk factors, such as age, sex, alcohol use, and family history [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>]. Among these features, relative leg length was an interesting clue to type 2 diabetes that has not previously been used in type 2 diabetes prediction; this feature was selected by <italic>t</italic> test and forward conditional logistic regression. Epidemiological studies from various settings indicate that humans with shorter legs relative to their stature have higher risk for type 2 diabetes [<xref ref-type="bibr" rid="ref28">28</xref>]. Relative leg length can be easily determined and has a strong correlation with type 2 diabetes; therefore, it may be a useful new attribute in model building or epidemiology research. With increasing adoption of this feature, our model will be more accurate and dependable.</p>
        <p>Reliable type 2 diabetes screening models based on noninvasive tests and machine learning algorithms were established and validated in this study. All the easy ensemble methods yielded higher predictive performance (AUC≥0.85 and AUC≥0.83, respectively) in the test set and validation set than the simple methods, indicating the efficiency of the ensemble methods. Screening models based on population are always an unbalanced problem, with more negative samples and fewer positive samples in the whole dataset. In other words, the learning ability of the models is not satisfied by the positive samples. We randomly matched a negative sample for every positive sample and generated 100 base models. This type of repeated learning from the positive samples may improve the results of the models. In addition to AUC, the application of the ensemble can increase the steadiness of the performance; this was exhibited by other measurements, such as sensitivity, specificity, accuracy, and PPV. Compared with different machine learning methods, the ensemble method improvement is limited; this suggests that the dataset and features are more essential. In recent research, the results show that individuals with screen-detected type 2 diabetes were diagnosed earlier and had better outcomes than those who were clinically detected with regard to all-cause mortality, cerebrovascular disease, renal disease, and retinopathy [<xref ref-type="bibr" rid="ref45">45</xref>]. In addition to earlier ordinal treatment, Ej et al [<xref ref-type="bibr" rid="ref46">46</xref>] introduced a method to recover the function of islets by diet control. Regardless of treatment, quality of life improvement and decreased disease burden are important.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>There are several limitations of our research project. The World Health Organization definition of diabetes is inferior to proper diagnosis by an experienced physician; also, we cannot clearly separate type 1 diabetes from type 2 diabetes, which would cause bias because of their different epidemiological attributes. After removing the baseline missing values and executing the inclusion and exclusion criteria, there were 10,710 samples in the entire database. Additionally, 2653 missing and biased values were removed. The proportion of patients with diabetes to patients without diabetes is approximately 1:5; therefore, the increased amount of abandoned diabetes data may reduce the predictive ability of the model. Reproducibility remains doubtful given the variable demographics of the different datasets. Only a study using noninvasive features to screen for diabetes can minimize the impact of demographic changes such as those considered in large population health studies and nutrition surveys. The best PPV was only 0.435 in the validation set; this indicates that only approximately 40% of true positive samples from the people detected positively by these models were patients with type 2 diabetes. A higher false-positive value increases the financial expenses of the health care system in the beginning; however, this type of screening program can improve the overall health of the population, and earlier diagnosis can decrease the disease burden, ultimately decreasing health care expenses related to diabetes. On one hand, although the easy ensemble method [<xref ref-type="bibr" rid="ref37">37</xref>] applied here addresses the unbalanced problem in one sense, more positive observations may yield better performance; on the other hand, the building of type 2 diabetes screening models is always an imbalanced problem when screening patients with type 2 diabetes from a large population. Therefore, we cannot solve the unbalanced problem completely. After considering all the other possible biases influencing the performance of the models, the key point is to further explore and optimize the unbalanced problem.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Accurate models with low-cost variables based on NHANES data for screening type 2 diabetes were established; the models performed better with the application of ensemble methods. The use of NHANES data by the models ensured a sufficient sample size, and the models can be a tool to determine the health conditions of people who were not included in the survey. Compared with prior literature, this study has certain advantages, such as noninvasive features and reliable model performance. However, we still obtained low PPV results for the unbalanced problem and could not completely solve the missing value problem. Furthermore, we can not only optimize the method by incorporating more quality data from medical schools but can also combine our study with a cohort study to achieve primary prevention.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">caret</term>
          <def>
            <p>Classification And REgression Training</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">NHANES</term>
          <def>
            <p>National Health and Nutrition Examination Survey</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">OR</term>
          <def>
            <p>odds ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the Important Scientific and Technical Achievements Transformation Project under Grant No. Z17-5-078, the Agricultural Science and Technology Innovation Team of the Science and Technology Department of Liaoning Province, the Large-scale Instrument Equipment Sharing Service Platform Capacity Building Fund under Grant No. kjhx2017028, the High-level Innovation Team Foreign Training Project under Grant No. 2018LNGXGJWPY-YB006, the Shenyang Science and Technology Plan Project under Grant Nos. F16-205-1-51, 17-65-7-00 and 17-231-1-04, and the Excellent Chinese and Foreign Youth Exchange Plant Project from the China Association for Science and Technology under Grant No. 2018CASTQNJL50. This project was supported by the Engineering Laboratory for Molecular Simulation and Designing of Drug Molecules of Liaoning and the Research Center for Computer Simulating and Information Processing of Bio-macromolecules of Liaoning Province.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>TZY and LZ are co–first authors and contributed equally to this work. TZY prepared the first draft of the paper and performed the primary computations for the analysis. LZ developed the main R program. LWY built the prediction website. FHW and SML performed the literature search and plotted the figures. LZ, HSL, and the other authors provided overall guidance, reviewed the results, or reviewed and contributed to this manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>World Health Organization</collab>
          </person-group>
          <source>Classification of diabetes mellitus</source>
          <year>2019</year>
          <month>4</month>
          <day>21</day>
          <publisher-loc>Geneva</publisher-loc>
          <publisher-name>World Health Organization</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carstensen</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Jørgensen</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Friis</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The Epidemiology of Diabetes and Cancer</article-title>
          <source>Curr Diab Rep</source>
          <year>2014</year>
          <month>8</month>
          <day>26</day>
          <volume>14</volume>
          <issue>10</issue>
          <pub-id pub-id-type="doi">10.1007/s11892-014-0535-8</pub-id>
          <pub-id pub-id-type="medline">25156543</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Diabetes and the risk of multi-system aging phenotypes: a systematic review and meta-analysis</article-title>
          <source>PLoS One</source>
          <year>2009</year>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>e4144</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0004144"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0004144</pub-id>
          <pub-id pub-id-type="medline">19127292</pub-id>
          <pub-id pub-id-type="pmcid">PMC2607544</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Backholer</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gearon</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Harding</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Freak-Poli</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Stevenson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Peeters</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Diabetes and risk of physical disability in adults: a systematic review and meta-analysis</article-title>
          <source>Lancet Diabetes Endocrinol</source>
          <year>2013</year>
          <month>10</month>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>106</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S2213-8587(13)70046-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2213-8587(13)70046-9</pub-id>
          <pub-id pub-id-type="medline">24622316</pub-id>
          <pub-id pub-id-type="pii">S2213-8587(13)70046-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jeon</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>MB</given-names>
            </name>
          </person-group>
          <article-title>Diabetes Mellitus Increases the Risk of Active Tuberculosis: A Systematic Review of 13 Observational Studies</article-title>
          <source>PLoS Med</source>
          <year>2008</year>
          <month>7</month>
          <day>15</day>
          <volume>5</volume>
          <issue>7</issue>
          <fpage>e152</fpage>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.0050152</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Riza</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ugarte-Gil</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Alisjahbana</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>van de Vijver</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Panduru</surname>
              <given-names>NM</given-names>
            </name>
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Ruslami</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Aarnoutse</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Critchley</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>van Crevel</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Clinical management of concurrent diabetes and tuberculosis and the implications for patient services</article-title>
          <source>Lancet Diabetes Endocrinol</source>
          <year>2014</year>
          <month>09</month>
          <volume>2</volume>
          <issue>9</issue>
          <fpage>740</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25194887"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2213-8587(14)70110-X</pub-id>
          <pub-id pub-id-type="medline">25194887</pub-id>
          <pub-id pub-id-type="pii">S2213-8587(14)70110-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC4852378</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lloyd</surname>
              <given-names>CE</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology of depression and diabetes: a systematic review</article-title>
          <source>J Affect Disord</source>
          <year>2012</year>
          <month>10</month>
          <volume>142 Suppl</volume>
          <fpage>S8</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.1016/S0165-0327(12)70004-6</pub-id>
          <pub-id pub-id-type="medline">23062861</pub-id>
          <pub-id pub-id-type="pii">S0165-0327(12)70004-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jacobs</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hoyer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brinks</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Icks</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kuß</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Rathmann</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Healthcare costs of Type 2 diabetes in Germany</article-title>
          <source>Diabet Med</source>
          <year>2017</year>
          <month>06</month>
          <volume>34</volume>
          <issue>6</issue>
          <fpage>855</fpage>
          <lpage>861</lpage>
          <pub-id pub-id-type="doi">10.1111/dme.13336</pub-id>
          <pub-id pub-id-type="medline">28199029</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>World Health Organization</collab>
          </person-group>
          <source>Global Report On Diabetes</source>
          <year>2016</year>
          <publisher-loc>Geneva</publisher-loc>
          <publisher-name>World Health Organization</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rawshani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rawshani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Franzén</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Eliasson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Svensson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Miftaraj</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McGuire</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Sattar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Rosengren</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gudbjörnsdottir</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Mortality and Cardiovascular Disease in Type 1 and Type 2 Diabetes</article-title>
          <source>N Engl J Med</source>
          <year>2017</year>
          <month>04</month>
          <day>13</day>
          <volume>376</volume>
          <issue>15</issue>
          <fpage>1407</fpage>
          <lpage>1418</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMoa1608664</pub-id>
          <pub-id pub-id-type="medline">28402770</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>NCD Risk Factor Collaboration (NCD-RisC)</collab>
          </person-group>
          <article-title>Worldwide trends in diabetes since 1980: a pooled analysis of 751 population-based studies with 4.4 million participants</article-title>
          <source>Lancet</source>
          <year>2016</year>
          <month>04</month>
          <day>09</day>
          <volume>387</volume>
          <issue>10027</issue>
          <fpage>1513</fpage>
          <lpage>1530</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0140-6736(16)00618-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(16)00618-8</pub-id>
          <pub-id pub-id-type="medline">27061677</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(16)00618-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC5081106</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>International Diabetes Federation</collab>
          </person-group>
          <source>IDF Diabetes Atlas, 8th edition</source>
          <year>2017</year>
          <publisher-loc>Brussels</publisher-loc>
          <publisher-name>International Diabetes Federation</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rawshani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rawshani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Franzén</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sattar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Eliasson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Svensson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zethelius</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Miftaraj</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McGuire</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Rosengren</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gudbjörnsdottir</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Risk Factors, Mortality, and Cardiovascular Outcomes in Patients with Type 2 Diabetes</article-title>
          <source>N Engl J Med</source>
          <year>2018</year>
          <month>08</month>
          <day>16</day>
          <volume>379</volume>
          <issue>7</issue>
          <fpage>633</fpage>
          <lpage>644</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMoa1800256</pub-id>
          <pub-id pub-id-type="medline">30110583</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Simmons</surname>
              <given-names>RK</given-names>
            </name>
            <name name-style="western">
              <surname>Echouffo-Tcheugui</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Sharp</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sargeant</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Prevost</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Kinmonth</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Wareham</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Screening for type 2 diabetes and population mortality over 10 years (ADDITION-Cambridge): a cluster-randomised controlled trial</article-title>
          <source>Lancet</source>
          <year>2012</year>
          <month>11</month>
          <day>17</day>
          <volume>380</volume>
          <issue>9855</issue>
          <fpage>1741</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0140-6736(12)61422-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(12)61422-6</pub-id>
          <pub-id pub-id-type="medline">23040422</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(12)61422-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC3607818</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Simmons</surname>
              <given-names>RK</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jakes</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Yuyun</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Niggebrugge</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Hennings</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>DRR</given-names>
            </name>
            <name name-style="western">
              <surname>Wareham</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Effect of population screening for type 2 diabetes on mortality: long-term follow-up of the Ely cohort</article-title>
          <source>Diabetologia</source>
          <year>2011</year>
          <month>02</month>
          <volume>54</volume>
          <issue>2</issue>
          <fpage>312</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1007/s00125-010-1949-8</pub-id>
          <pub-id pub-id-type="medline">20978739</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Rule extraction from support vector machines using ensemble learning approach: an application for diagnosis of diabetes</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2015</year>
          <month>03</month>
          <volume>19</volume>
          <issue>2</issue>
          <fpage>728</fpage>
          <lpage>34</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2014.2325615</pub-id>
          <pub-id pub-id-type="medline">24860043</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maniruzzaman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Menhazul Abedin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shaykhul Islam</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Suri</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>El-Baz</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Suri</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Comparative approaches for classification of diabetes mellitus data: Machine learning paradigm</article-title>
          <source>Comput Methods Programs Biomed</source>
          <year>2017</year>
          <month>12</month>
          <volume>152</volume>
          <fpage>23</fpage>
          <lpage>34</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2017.09.004</pub-id>
          <pub-id pub-id-type="medline">29054258</pub-id>
          <pub-id pub-id-type="pii">S0169-2607(17)30282-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maniruzzaman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Al-MehediHasan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Suri</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Abedin</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>El-Baz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Suri</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Accurate Diabetes Risk Stratification Using Machine Learning: Role of Missing Value and Outliers</article-title>
          <source>J Med Syst</source>
          <year>2018</year>
          <month>04</month>
          <day>10</day>
          <volume>42</volume>
          <issue>5</issue>
          <fpage>92</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29637403"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10916-018-0940-7</pub-id>
          <pub-id pub-id-type="medline">29637403</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10916-018-0940-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5893681</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A machine learning-based framework to identify type 2 diabetes through electronic health records</article-title>
          <source>Int J Med Inform</source>
          <year>2017</year>
          <month>12</month>
          <volume>97</volume>
          <fpage>120</fpage>
          <lpage>127</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27919371"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2016.09.014</pub-id>
          <pub-id pub-id-type="medline">27919371</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(16)30215-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC5144921</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Qu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ju</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Predicting Diabetes Mellitus With Machine Learning Techniques</article-title>
          <source>Front Genet</source>
          <year>2018</year>
          <volume>9</volume>
          <fpage>515</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.3389/fgene.2018.00515"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fgene.2018.00515</pub-id>
          <pub-id pub-id-type="medline">30459809</pub-id>
          <pub-id pub-id-type="pmcid">PMC6232260</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>DW</given-names>
            </name>
          </person-group>
          <article-title>Screening for pre-diabetes using support vector machine model</article-title>
          <source>Conf Proc IEEE Eng Med Biol Soc</source>
          <year>2014</year>
          <volume>2014</volume>
          <fpage>2472</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1109/EMBC.2014.6944123</pub-id>
          <pub-id pub-id-type="medline">25570491</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuhn</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>CRAN - R Project</source>
          <year>2019</year>
          <month>4</month>
          <day>18</day>
          <access-date>2019-04-24</access-date>
          <comment>caret: Classification and Regression Training<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cran.r-project.org/web/packages/caret/">https://cran.r-project.org/web/packages/caret/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cutler</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wiener</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liaw</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>randomForest: Breiman and Cutler's Random Forests for Classification and Regression</source>
          <year>2018</year>
          <month>3</month>
          <day>25</day>
          <access-date>2019-04-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cran.r-project.org/web/packages/randomForest/">https://cran.r-project.org/web/packages/randomForest/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xavier</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Natacha</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Alexandre</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Natalia</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Frédérique</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jean-charles</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Markus</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>pROC:Display and Analyze ROC Curves</source>
          <year>2019</year>
          <month>3</month>
          <day>12</day>
          <access-date>2019-04-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cran.r-project.org/web/packages/pROC/index.html">https://cran.r-project.org/web/packages/pROC/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dimitriadou</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hornik</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Weingessel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Leisch</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <source>e1071: Misc Functions of the Department of Statistics, Probability Theory Group (Formerly), TU Wien</source>
          <year>2019</year>
          <month>3</month>
          <day>19</day>
          <access-date>2019-04-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cran.r-project.org/web/packages/e1071/index.html">https://cran.r-project.org/web/packages/e1071/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pozzolo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Caelen</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Bontempi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <source>unbalanced: Racing for Unbalanced Methods Selection</source>
          <year>2015</year>
          <month>6</month>
          <day>26</day>
          <access-date>2019-04-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cran.r-project.org/web/packages/unbalanced/index.html">https://cran.r-project.org/web/packages/unbalanced/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Whelton</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Carey</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Aronow</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Casey</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dennison Himmelfarb</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>DePalma</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Gidding</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jamerson</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>MacLaughlin</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Muntner</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ovbiagele</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Spencer</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Stafford</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Taler</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>2017 ACC/AHA/AAPA/ABC/ACPM/AGS/APhA/ASH/ASPC/NMA/PCNA Guideline for the Prevention, Detection, Evaluation, and Management of High Blood Pressure in Adults: Executive Summary: A Report of the American College of Cardiology/American Heart Association Task Force on Clinical Practice Guidelines</article-title>
          <source>J Am Coll Cardiol</source>
          <year>2018</year>
          <month>05</month>
          <day>15</day>
          <volume>71</volume>
          <issue>19</issue>
          <fpage>2199</fpage>
          <lpage>2269</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0735-1097(17)41518-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jacc.2017.11.005</pub-id>
          <pub-id pub-id-type="medline">29146533</pub-id>
          <pub-id pub-id-type="pii">S0735-1097(17)41518-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mueller</surname>
              <given-names>NT</given-names>
            </name>
            <name name-style="western">
              <surname>Pereira</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Leg length and type 2 diabetes: what's the link?</article-title>
          <source>Curr Opin Clin Nutr Metab Care</source>
          <year>2015</year>
          <month>09</month>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>452</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1097/MCO.0000000000000211</pub-id>
          <pub-id pub-id-type="medline">26167802</pub-id>
          <pub-id pub-id-type="pmcid">PMC4672946</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rapp</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Schroeder</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Klenk</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ulmer</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Concin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Diem</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Oberaigner</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Weiland</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>Fasting blood glucose and cancer risk in a cohort of more than 140,000 adults in Austria</article-title>
          <source>Diabetologia</source>
          <year>2006</year>
          <month>05</month>
          <volume>49</volume>
          <issue>5</issue>
          <fpage>945</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1007/s00125-006-0207-6</pub-id>
          <pub-id pub-id-type="medline">16557372</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Orsi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Grancini</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Menini</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aghemo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pugliese</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Hepatogenous diabetes: Is it time to separate it from type 2 diabetes?</article-title>
          <source>Liver Int</source>
          <year>2017</year>
          <month>12</month>
          <volume>37</volume>
          <issue>7</issue>
          <fpage>950</fpage>
          <lpage>962</lpage>
          <pub-id pub-id-type="doi">10.1111/liv.13337</pub-id>
          <pub-id pub-id-type="medline">27943508</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Walker</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Duncan</surname>
              <given-names>DB</given-names>
            </name>
          </person-group>
          <article-title>Estimation of the probability of an event as a function of several independent variables</article-title>
          <source>Biometrika</source>
          <year>1967</year>
          <month>06</month>
          <volume>54</volume>
          <issue>1</issue>
          <fpage>167</fpage>
          <lpage>79</lpage>
          <pub-id pub-id-type="medline">6049533</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cox</surname>
              <given-names>DR</given-names>
            </name>
          </person-group>
          <article-title>The Regression Analysis of Binary Sequences</article-title>
          <source>J R Stat Soc B</source>
          <year>2018</year>
          <month>12</month>
          <day>05</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>238</fpage>
          <lpage>238</lpage>
          <pub-id pub-id-type="doi">10.1111/j.2517-6161.1959.tb00334.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>The Use of Multiple Measurements in Taxonomic Problems</article-title>
          <source>Ann Hum Genet</source>
          <year>1936</year>
          <volume>7</volume>
          <fpage>179</fpage>
          <lpage>188</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1469-1809.1936.tb02137.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>Ad</given-names>
            </name>
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>Jh</given-names>
            </name>
            <name name-style="western">
              <surname>Olshen</surname>
              <given-names>Ra</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>Cj</given-names>
            </name>
          </person-group>
          <article-title>Classification and Regression Trees</article-title>
          <source>Biometrics</source>
          <year>1984</year>
          <month>09</month>
          <volume>40</volume>
          <issue>3</issue>
          <fpage>874</fpage>
          <pub-id pub-id-type="doi">10.2307/2530946</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tin</surname>
              <given-names>KH</given-names>
            </name>
          </person-group>
          <article-title>Random decision forests</article-title>
          <year>1995</year>
          <conf-name>Proceedings of 3rd International Conference on Document Analysis and Recognition, Montreal</conf-name>
          <conf-date>1995</conf-date>
          <conf-loc>Quebec, Canada</conf-loc>
          <fpage>278</fpage>
          <lpage>282</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cortes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vapnik</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Support-vector networks</article-title>
          <source>Mach Learn</source>
          <year>1995</year>
          <month>9</month>
          <volume>20</volume>
          <issue>3</issue>
          <fpage>273</fpage>
          <lpage>297</lpage>
          <pub-id pub-id-type="doi">10.1007/BF00994018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Exploratory undersampling for class-imbalance learning</article-title>
          <source>IEEE Trans Syst Man Cybern B Cybern</source>
          <year>2009</year>
          <month>04</month>
          <volume>39</volume>
          <issue>2</issue>
          <fpage>539</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1109/TSMCB.2008.2007853</pub-id>
          <pub-id pub-id-type="medline">19095540</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Youden</surname>
              <given-names>WJ</given-names>
            </name>
          </person-group>
          <article-title>Index for rating diagnostic tests</article-title>
          <source>Cancer</source>
          <year>1950</year>
          <month>01</month>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>32</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1002/1097-0142(1950)3:1&#60;32::aid-cncr2820030106&#62;3.0.co;2-3</pub-id>
          <pub-id pub-id-type="medline">15405679</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <source>Type 2 Diabetes Predicition Webset</source>
          <access-date>2020-05-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://112.126.70.33/diabetes">http://112.126.70.33/diabetes</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Foreman</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Marquez</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Dolgert</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fukutaki</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Fullman</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>McGaughey</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pletcher</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Heuton</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Holmberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Reidy</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Carter</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cercy</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chapin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Douwes-Schultz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Frank</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Goettsch</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>PY</given-names>
            </name>
            <name name-style="western">
              <surname>Nandakumar</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Reitsma</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Reuter</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Sadat</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sorensen</surname>
              <given-names>RJD</given-names>
            </name>
            <name name-style="western">
              <surname>Srinivasan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Updike</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>York</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Lozano</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Mokdad</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Vollset</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>CJL</given-names>
            </name>
          </person-group>
          <article-title>Forecasting life expectancy, years of life lost, and all-cause and cause-specific mortality for 250 causes of death: reference and alternative scenarios for 2016-40 for 195 countries and territories</article-title>
          <source>Lancet</source>
          <year>2018</year>
          <month>12</month>
          <day>10</day>
          <volume>392</volume>
          <issue>10159</issue>
          <fpage>2052</fpage>
          <lpage>2090</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0140-6736(18)31694-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(18)31694-5</pub-id>
          <pub-id pub-id-type="medline">30340847</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(18)31694-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC6227505</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Firouzi</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Tucker</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>LeCheminant</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>BW</given-names>
            </name>
          </person-group>
          <article-title>Sagittal Abdominal Diameter, Waist Circumference, and BMI as Predictors of Multiple Measures of Glucose Metabolism: An NHANES Investigation of US Adults</article-title>
          <source>J Diabetes Res</source>
          <year>2018</year>
          <month>06</month>
          <volume>2018</volume>
          <fpage>3604108</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.1155/2018/3604108"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2018/3604108</pub-id>
          <pub-id pub-id-type="medline">30018985</pub-id>
          <pub-id pub-id-type="pmcid">PMC6029495</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aune</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ó</surname>
              <given-names>HB</given-names>
            </name>
            <name name-style="western">
              <surname>Vatten</surname>
              <given-names>LJ</given-names>
            </name>
          </person-group>
          <article-title>Resting heart rate and the risk of type 2 diabetes: A systematic review and dose--response meta-analysis of cohort studies</article-title>
          <source>Nutr Metab Cardiovasc Dis</source>
          <year>2015</year>
          <month>06</month>
          <volume>25</volume>
          <issue>6</issue>
          <fpage>526</fpage>
          <lpage>34</lpage>
          <pub-id pub-id-type="doi">10.1016/j.numecd.2015.02.008</pub-id>
          <pub-id pub-id-type="medline">25891962</pub-id>
          <pub-id pub-id-type="pii">S0939-4753(15)00051-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Association between alcohol consumption and the risk of incident type 2 diabetes: a systematic review and dose-response meta-analysis</article-title>
          <source>Am J Clin Nutr</source>
          <year>2016</year>
          <month>03</month>
          <volume>103</volume>
          <issue>3</issue>
          <fpage>818</fpage>
          <lpage>29</lpage>
          <pub-id pub-id-type="doi">10.3945/ajcn.115.114389</pub-id>
          <pub-id pub-id-type="medline">26843157</pub-id>
          <pub-id pub-id-type="pii">ajcn.115.114389</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Valdez</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Khoury</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Family history and prevalence of diabetes in the U.S. population: the 6-year results from the National Health and Nutrition Examination Survey (1999-2004)</article-title>
          <source>Diabetes Care</source>
          <year>2007</year>
          <month>10</month>
          <volume>30</volume>
          <issue>10</issue>
          <fpage>2517</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.2337/dc07-0720</pub-id>
          <pub-id pub-id-type="medline">17634276</pub-id>
          <pub-id pub-id-type="pii">dc07-0720</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feldman</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Fhärm</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Norberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wennberg</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weinehall</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rolandsson</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Screening for type 2 diabetes: do screen-detected cases fare better?</article-title>
          <source>Diabetologia</source>
          <year>2017</year>
          <month>11</month>
          <volume>60</volume>
          <issue>11</issue>
          <fpage>2200</fpage>
          <lpage>2209</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28831538"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00125-017-4402-4</pub-id>
          <pub-id pub-id-type="medline">28831538</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00125-017-4402-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC6086324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lean</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Leslie</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Brosnahan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Thom</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>McCombie</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhyzhneuskaya</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Mrabeh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hollingsworth</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>Rodrigues</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Rehackova</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Adamson</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sniehotta</surname>
              <given-names>FF</given-names>
            </name>
            <name name-style="western">
              <surname>Mathers</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>HM</given-names>
            </name>
            <name name-style="western">
              <surname>McIlvenna</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Stefanetti</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Trenell</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Welsh</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kean</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ford</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>McConnachie</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sattar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Primary care-led weight management for remission of type 2 diabetes (DiRECT): an open-label, cluster-randomised trial</article-title>
          <source>Lancet</source>
          <year>2018</year>
          <month>12</month>
          <day>10</day>
          <volume>391</volume>
          <issue>10120</issue>
          <fpage>541</fpage>
          <lpage>551</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(17)33102-1</pub-id>
          <pub-id pub-id-type="medline">29221645</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(17)33102-1</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
