<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i7e18963</article-id>
      <article-id pub-id-type="pmid">32618575</article-id>
      <article-id pub-id-type="doi">10.2196/18963</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Predicting Current Glycated Hemoglobin Levels in Adults From Electronic Health Records: Validation of Multiple Logistic Regression Algorithm</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Spiliopoulou</surname>
            <given-names>Myra</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Alhassan</surname>
            <given-names>Zakhriya</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science</institution>
            <institution>Durham University</institution>
            <addr-line>Mountjoy Centre</addr-line>
            <addr-line>Stockton Road</addr-line>
            <addr-line>Durham, DH1 3LE</addr-line>
            <country>United Kingdom</country>
            <phone>44 191 3341724</phone>
            <email>zakhriya.n.alhassan@durham.ac.uk</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6644-1656</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Budgen</surname>
            <given-names>David</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7143-0241</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Alshammari</surname>
            <given-names>Riyad</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0529-2458</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Al Moubayed</surname>
            <given-names>Noura</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8942-355X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science</institution>
        <institution>Durham University</institution>
        <addr-line>Durham</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Computer Science Department</institution>
        <institution>College of Computer Science and Engineering</institution>
        <institution>University of Jeddah</institution>
        <addr-line>Jeddah</addr-line>
        <country>Saudi Arabia</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>College of Public Health and Health Informatics</institution>
        <institution>Health Informatics Department</institution>
        <institution>King Saud bin Abdulaziz University for Health Sciences</institution>
        <addr-line>Riyadh</addr-line>
        <country>Saudi Arabia</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>King Abdullah International Medical Research Center</institution>
        <institution>Ministry of the National Guard - Health Affairs</institution>
        <addr-line>Riyadh</addr-line>
        <country>Saudi Arabia</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Zakhriya Alhassan <email>zakhriya.n.alhassan@durham.ac.uk</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>3</day>
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>7</issue>
      <elocation-id>e18963</elocation-id>
      <history>
        <date date-type="received">
          <day>29</day>
          <month>3</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>26</day>
          <month>4</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>31</day>
          <month>5</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>4</day>
          <month>6</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Zakhriya Alhassan, David Budgen, Riyad Alshammari, Noura Al Moubayed. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 03.07.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2020/7/e18963" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Electronic health record (EHR) systems generate large datasets that can significantly enrich the development of medical predictive models. Several attempts have been made to investigate the effect of glycated hemoglobin (HbA<sub>1c</sub>) elevation on the prediction of diabetes onset. However, there is still a need for validation of these models using EHR data collected from different populations.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study is to perform a replication study to validate, evaluate, and identify the strengths and weaknesses of replicating a predictive model that employed multiple logistic regression with EHR data to forecast the levels of HbA<sub>1c</sub>. The original study used data from a population in the United States and this differentiated replication used a population in Saudi Arabia.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>A total of 3 models were developed and compared with the model created in the original study. The models were trained and tested using a larger dataset from Saudi Arabia with 36,378 records. The 10-fold cross-validation approach was used for measuring the performance of the models.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Applying the method employed in the original study achieved an accuracy of 74% to 75% when using the dataset collected from Saudi Arabia, compared with 77% obtained from using the population from the United States. The results also show a different ranking of importance for the predictors between the original study and the replication. The order of importance for the predictors with our population, from the most to the least importance, is age, random blood sugar, estimated glomerular filtration rate, total cholesterol, non–high-density lipoprotein, and body mass index.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This replication study shows that direct use of the models (calculators) created using multiple logistic regression to predict the level of HbA<sub>1c</sub> may not be appropriate for all populations. This study reveals that the weighting of the predictors needs to be calibrated to the population used. However, the study does confirm that replicating the original study using a different population can help with predicting the levels of HbA<sub>1c</sub> by using the predictors that are routinely collected and stored in hospital EHR systems.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>glycated hemoglobin</kwd>
        <kwd>HbA<sub>1c</sub></kwd>
        <kwd>prediction</kwd>
        <kwd>electronic health records</kwd>
        <kwd>diabetes</kwd>
        <kwd>differentiated replication</kwd>
        <kwd>EHR</kwd>
        <kwd>hemoglobin</kwd>
        <kwd>logistic regression</kwd>
        <kwd>medical informatics</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Diabetes is a growing medical condition worldwide. Globally, the estimated number of diabetic patients in 2017 was 425 million, and it is expected to be more than 629 million by 2045, an increase of more than 48%. The number of people with borderline diabetes is also rapidly increasing. According to the International Diabetes Federation (IDF), there are 352 million people worldwide who are at risk of developing diabetes [<xref ref-type="bibr" rid="ref1">1</xref>]. The latest estimates indicate that 35.3% of the adults in the United Kingdom and the United States have prediabetes [<xref ref-type="bibr" rid="ref2">2</xref>].</p>
      <p>Type 2 diabetes mellitus (T2DM) is the most common form of diabetes, accounting for 91% to 95% of all cases [<xref ref-type="bibr" rid="ref3">3</xref>]. T2DM is difficult to diagnose in its early stages because it does not have clear clinical symptoms. As a result of the slow development of its symptoms, it often stays undetected for a long time [<xref ref-type="bibr" rid="ref4">4</xref>]. The IDF estimates that half of people with diabetes do not know or feel that they are developing diabetes [<xref ref-type="bibr" rid="ref1">1</xref>].</p>
      <p>Hemoglobin is responsible for transporting oxygen throughout the body’s cells and, when joined with the glucose within the blood, it forms glycated hemoglobin (HbA<sub>1c</sub>) [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. The International Expert Committee, with members from the American Diabetes Association (ADA), the European Association for the Study of Diabetes, and the International Diabetes Federation [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>], recommends the use of the glycated hemoglobin test to identify adults with a high risk of diabetes [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
      <p>An elevation of HbA<sub>1c</sub> level in the blood can be related to chronic complications and lead to serious health conditions [<xref ref-type="bibr" rid="ref10">10</xref>]. Patients with HbA<sub>1c</sub> levels of 5.5% to 6.0% have a substantial risk of developing diabetes, increased by 25% compared with patients with HbA<sub>1c</sub> levels less than 5.5%. Furthermore, patients with HbA<sub>1c</sub> levels of more than 6.0% have a 50% chance of developing T2DM over the next 5 years. Those patients are at 20 or more times higher risk than patients who have a level of 5.0% or less [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
      <p>A study by Huang et al [<xref ref-type="bibr" rid="ref12">12</xref>] showed that patients with HbA<sub>1c</sub> levels of 5.7% to 6.5% are likely to develop diabetes in 2.49 years. Not only that, but the trend of the HbA<sub>1c</sub> test has been shown to be an important factor for predicting mortality for patients with T2DM [<xref ref-type="bibr" rid="ref13">13</xref>]. Furthermore, nondiabetic people with an elevated HbA<sub>1c</sub> level have an increased risk of cardiovascular disease [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Hence, studies suggest that patients with and without diabetes with raised levels of HbA<sub>1c</sub> should be clinically checked and monitored as a preventive intervention to avoid developing T2DM or cardiovascular diseases [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>].</p>
      <p>Many studies have investigated the correlation between HbA<sub>1c</sub> and clinical variables using statistical and mathematical approaches [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. However, we are not aware of any that have performed replications of the predictive models on different populations. In this paper, we investigate building statistical models that predict the probability of patients having an elevated level of HbA<sub>1c</sub>. We employ comparative statistical models similar to the models used by Wells et al [<xref ref-type="bibr" rid="ref2">2</xref>] and apply them to a larger electronic health record (EHR) dataset collected from King Abdullah International Medical Research Center (KAIMRC) [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>] in Saudi Arabia.</p>
      <p>The work by Wells et al [<xref ref-type="bibr" rid="ref2">2</xref>], which we refer to in this paper as the original study, focused on predicting the level of HbA<sub>1c</sub> for patients who were not previously diagnosed with diabetes or taking diabetes medications. The data were extracted from the EHR database of Wake Forest Baptist Medical Center in the United States. The authors applied a multiple logistic regression model to create a mathematical equation for calculating the level of HbA<sub>1c</sub> (≥5.7). The predictors used in the equation were chosen from a list of theoretically associated hyperglycemia variables (laboratory measurements, medication categories, diagnosis, vital signs, demographics, family history, and social history variables). After reducing the model’s variables using Harrell’s model approximation method [<xref ref-type="bibr" rid="ref22">22</xref>] and removing variables that caused collinearity, the final equation associated 8 independent variables with the result of the HbA<sub>1c</sub> blood test. Restricted cubic splines (RCS) with 3 knots were used for fitting the continuous predictors into the model [<xref ref-type="bibr" rid="ref2">2</xref>]. The calculator achieved an accuracy of 77%.</p>
      <p>The independent replication of empirical studies is widely regarded as being an essential underpinning of the scientific paradigm. Successful replication of a study by other researchers is considered to be an important step in verifying the original findings and helping to determine how widely they apply.</p>
      <p>While the vocabulary associated with replication varies across disciplines [<xref ref-type="bibr" rid="ref23">23</xref>], the terms employed by Lindsay and Ehrenberg [<xref ref-type="bibr" rid="ref24">24</xref>] appear to be widely used and recognized, so they will be used in this paper. Lindsay and Ehrenberg categorize replication studies as either (1) close replications or (2) differentiated replications.</p>
      <p>First, a close replication seeks to repeat the original study in a way that keeps all the “known conditions of the study the same or very similar” [<xref ref-type="bibr" rid="ref24">24</xref>]. Hence, such a study employs the same forms of measurement, sampling, and analysis as the original, while also seeking to keep the profile of any set of participants as close to the original as possible. A close replication aims to test the hypothesis that, when a given study is repeated under the same experimental conditions as the original study, it should produce the same (or nearly the same) result.</p>
      <p>Second, a differentiated replication introduces known variations into what Lindsay and Ehrenberg term “fairly major aspects of the conditions of the study” [<xref ref-type="bibr" rid="ref24">24</xref>]. Differentiated replications provide a test of how widely the original findings can be generalized, their scope, and the conditions under which they may not hold. For a differentiated replication, therefore, it is expected that some changes in the outcomes are likely to arise, and the question of interest is to what extent and in what form these outcome changes occur.</p>
      <p>In an ideal situation, one or more close replications would be used to validate the findings of an original study, followed by a set of differentiated replications used to scope out the extent of their validity by varying different conditions.</p>
      <p>For any replication study, it is possible to vary one or more factors from those factors that characterize the way that the study was performed. These may include the team performing the replication, the analysis process, the type of data employed, and the population from which the data were derived. As this study involves analyzing data collected from a human population rather than conducting an experiment or trial, we can expect that using a different team to perform a replication should have no effect. Hence, for a close replication it would be appropriate to use the same analysis tool with EHRs of the same form as used in the original study, but pertaining to a different sample of participants drawn from the same general population used in the original study.</p>
      <p>For the differentiated replication reported here, we have used the same form of analysis, but have applied this to a set of EHRs that were derived from a different population. The differences between the forms of the EHRs constituted one difference, but these differences were relatively small. The main difference in the studies arose from the population used. As with the original study, the selection of participants was largely driven by availability. We therefore expected that it was quite possible that there would be some differences in the outcomes, and our main goal was to investigate the extent and form of those differences.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Conduct of the Replication Study</title>
        <p>The KAIMRC dataset was collected by the Ministry of National Guard Health Affairs from the EHR systems of National Guard Hospitals in Saudi Arabia for the period from 2016 to the end of 2018. The dataset was then labelled according to the ADA guidelines. Patients with an HbA<sub>1c</sub> level of 5.7% or more are considered to have an elevated HbA<sub>1c</sub> and those with lower levels than that are considered normal. The predictors that were selected by the authors of the original study for calculating the level of HbA<sub>1c</sub>, listed in <xref ref-type="table" rid="table1">Table 1</xref>, were employed in this study, except for race and smoking status. Taking into account that most of the data samples in the KAIMRC dataset are from the same race, the race variable can be omitted, as it has zero variance [<xref ref-type="bibr" rid="ref25">25</xref>]. Smoking status information is absent from the KAIMRC dataset. However, in the original model used by Wells et al, this was ranked as having the lowest importance of all the predictors. The BMI and non–high-density lipoprotein measures were also absent. However, both can be calculated by using the formulae presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Predictors available in the original study versus King Abdullah International Medical Research Center datasets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="bottom">
                <td>Predictors</td>
                <td>Original study dataset</td>
                <td>KAIMRC<sup>a</sup> dataset</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Age</td>
                <td>√</td>
                <td>√</td>
              </tr>
              <tr valign="top">
                <td>Body mass index</td>
                <td>√</td>
                <td>√ (calculated)</td>
              </tr>
              <tr valign="top">
                <td>Estimated glomerular filtration rate</td>
                <td>√</td>
                <td>√</td>
              </tr>
              <tr valign="top">
                <td>Random blood sugar (glucose) level</td>
                <td>√</td>
                <td>√</td>
              </tr>
              <tr valign="top">
                <td>Non–high density lipoprotein</td>
                <td>√</td>
                <td>√ (calculated)</td>
              </tr>
              <tr valign="top">
                <td>Total cholesterol</td>
                <td>√</td>
                <td>√</td>
              </tr>
              <tr valign="top">
                <td>Race</td>
                <td>√</td>
                <td>x</td>
              </tr>
              <tr valign="top">
                <td>Smoking status</td>
                <td>√</td>
                <td>x</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>KAIMRC: King Abdullah International Medical Research Center, Saudi Arabia.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>In this study we followed the same sampling approach used in original study. For inpatient visits, only the first day’s data were considered, and in cases of missing values, the first available values for the visit were used. Samples for patients with values of &#60;1% for HbA<sub>1c</sub> were simply considered to be erroneous readings and were excluded. Similar to the original study, patients diagnosed with diabetes were eliminated from the development dataset (refer to <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> for diabetes diagnostic codes). We avoided intensive interpretation for handling the missing values. Samples with one or more completely missing values were also excluded. This resulted in decreasing the dataset size from the 262,559 samples originally collected to 36,378 samples. <xref rid="figure1" ref-type="fig">Figure 1</xref> shows the detailed preprocessing tasks performed prior to building the statistical models.</p>
        <p>The descriptive statistics for the KAIMRC experimental dataset and the dataset used by Wells et al are shown in <xref ref-type="table" rid="table2">Table 2</xref>. The units used for recording lab tests can differ according to the laboratory guidelines followed by each country. The KAIMRC dataset uses different units than the ones used in the original study for some variables. For instance, the total cholesterol level is measured in milligrams per deciliter (mg/dL) in the original study’s dataset, and in millimoles per liter (mmol/L) in the dataset from the KAIMRC labs. Therefore, the descriptive statistics contain the values using both units. When developing the predictive models, the authors converted the units using the appropriate formulae (see <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). However, the conversion task can be avoided to reduce data preprocessing complexity, as it should not affect the prediction performance for the logistic regression models.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Dataset preprocessing details. HbA<sub>1c</sub>: glycated hemoglobin.</p>
          </caption>
          <graphic xlink:href="medinform_v8i7e18963_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Descriptive statistics for King Abdullah International Medical Research Center and original study datasets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="290"/>
            <col width="150"/>
            <col width="150"/>
            <col width="90"/>
            <col width="150"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Variables<sup>a</sup></td>
                <td colspan="3">KAIMRC<sup>b</sup> dataset</td>
                <td colspan="2">Original study<sup>c</sup> dataset</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>HbA<sub>1c</sub><sup>d</sup> &#60;5.7% (n=14,332)</td>
                <td>HbA<sub>1c</sub> ≥5.7% (n=22,046)</td>
                <td><italic>P</italic> value</td>
                <td>HbA<sub>1c</sub> &#60;5.7% (n=16,743)</td>
                <td>HbA<sub>1c</sub> ≥5.7% (n=5892)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Age (years), mean (SD)</td>
                <td>45.5 (17.01)</td>
                <td>60.5 (14.13)</td>
                <td>&#60;.001</td>
                <td>48.1 (15.4)</td>
                <td>54.8 (14.0)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">BMI (kg/m<sup>2</sup>), mean (SD)</td>
                <td>29.61 (10.74)</td>
                <td>31.50 (12.13)</td>
                <td>&#60;.001</td>
                <td>30.1 (7.44)</td>
                <td>33.0 (8.41)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">eGFR<sup>e</sup> (mL/min/1.73 m<sup>2</sup>), mean (SD)</td>
                <td>93.40 (35.19)</td>
                <td>82.02 (28.86)</td>
                <td>&#60;.001</td>
                <td>92.0 (33.0)</td>
                <td>87.9 (30.8)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>RBS<sup>f</sup></bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>&#60;.001</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RBS (mmol/L), mean (SD)</td>
                <td>5.47 (1.28)</td>
                <td>8.30 (4.30)</td>
                <td>
                  <break/>
                </td>
                <td>4.9 (0.7)</td>
                <td>5.3 (0.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RBS (mg/dL), mean (SD)</td>
                <td>98.5 (23.00)</td>
                <td>149.4 (77.47)</td>
                <td>
                  <break/>
                </td>
                <td>88.4 (12.7)</td>
                <td>96.1 (16.0)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Cholesterol</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>&#60;.001</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Cholesterol (mmol/L), mean (SD)</td>
                <td>4.59 (1.19)</td>
                <td>4.17 (1.16)</td>
                <td>
                  <break/>
                </td>
                <td>4.80 (1.01)</td>
                <td>4.96 (1.11)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Cholesterol (mg/dL), mean (SD)</td>
                <td>177.49 (46.01)</td>
                <td>161.25 (44.85)</td>
                <td>
                  <break/>
                </td>
                <td>186 (39.4)</td>
                <td>192 (43.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Non-HDL<sup>g</sup></bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>&#60;.001</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-HDL (mmol/L), mean (SD)</td>
                <td>2.85 (1.06)</td>
                <td>2.49 (0.99)</td>
                <td>
                  <break/>
                </td>
                <td>3.49 (0.96)</td>
                <td>3.72 (1.07)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-HDL (mg/dL), mean (SD)</td>
                <td>110.2 (40.99)</td>
                <td>96.28 (38.28)</td>
                <td>
                  <break/>
                </td>
                <td>135 (37.4)</td>
                <td>144 (41.7)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Refer to <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> for unit conversion formulae.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>KAIMRC: King Abdullah International Medical Research Center, Saudi Arabia.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>Wake Forest Baptist Medical Center, North Carolina, United States.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>HbA<sub>1c</sub>: glycated hemoglobin.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>eGFR: estimated glomerular filtration rate.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>RBS: random blood sugar.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>HDL: high-density lipoproteins.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Study Design</title>
        <p>A complete validation of Wells et al’s calculator using our dataset was not possible due to the absence of the smoking status variable. To validate the approach used in the original study, 3 predictive models (PMs) were built, trained, and tested using the KAIMRC dataset. All models employ multiple logistic regression to create the calculator by associating the chosen and available predictors. After discussion with the authors of the original study, we structured the models as PM1, PM2, and PM3.</p>
        <p>PM1 was designed to be as close as possible to the original study’s model. It uses the predictors chosen in the original study: age, BMI, random blood sugar (RBS), non–high-density lipoprotein (non-HDL), cholesterol, and estimated glomerular filtration rate (eGFR). The continuous predictors are fitted to the model using RCS with 3 knots.</p>
        <p>PM2 was designed using the same predictors used in PM1 but without RCS fitting.</p>
        <p>PM3 was designed after excluding the predictors with the least importance in PM1 and PM2, using a reduced number of predictors and fitted using RCS with 5 knots. The choice of the number of knots for this model was determined by using Stone’s recommendation [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
        <p>The 3 models were validated using the 10-fold cross-validation approach. The measure used to evaluate and compare the results with the original study was the concordance statistic, which is equal to area under the receiver operating characteristic (AUR ROC) curve [<xref ref-type="bibr" rid="ref27">27</xref>]. To assist with future comparisons, we report measures commonly used for medical research, such as precision, recall, and F1, in the model evaluation. The data preparations are undertaken using Python (version 3.7; Python Software Foundation). The model building and the analysis are carried out in R (version 3.6.0; The R Foundation) using the regression modeling strategies package.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>The development data subset size used for training, testing, and validating the models after data preprocessing was 36,378 samples. Most medical datasets are imbalanced with a majority normal population [<xref ref-type="bibr" rid="ref28">28</xref>], but 60.60% (22,046/36,378) of KAIMRC dataset patients were found to have elevated levels of HbA<sub>1c</sub> (≥5.7%), and 39.40% (14,332/36,378) of patients had a normal HbA<sub>1c</sub> level (&#60;5.7%).</p>
      <p>Details of the 3 models (PM1, PM2, and PM3) used for the purpose of validating and evaluating the original study are shown in <xref ref-type="table" rid="table3">Table 3</xref>. This study explores multiple logistic regression models using different numbers of variables, with and without RCS, and with different numbers of knots. PM1 (using a complete set of variables fitted using RCS) achieves an average accuracy of 73.67% and 95% CI of 74% to 77% with a well-calibrated curve. A similar model (PM2), but not fitted using RCS, shows improved accuracy, with an average accuracy of 74.04% and the same 95% CI of 74% to 77%. However, the calibration curve shows better calibration when applying RCS into the models, as shown in <xref rid="figure2" ref-type="fig">Figures 2</xref> and <xref rid="figure3" ref-type="fig">3</xref>.</p>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Performance of models for glycated hemoglobin elevation prediction.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="100"/>
          <col width="140"/>
          <col width="200"/>
          <col width="120"/>
          <col width="130"/>
          <col width="100"/>
          <col width="120"/>
          <col width="90"/>
          <thead>
            <tr valign="bottom">
              <td>Model</td>
              <td>Variables used</td>
              <td>Number of RCS<sup>a</sup> knots</td>
              <td>AUR ROC<sup>b</sup></td>
              <td>95% CI</td>
              <td>Recall</td>
              <td>Precision</td>
              <td>F1</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>PM<sup>c</sup>1</td>
              <td>Complete<sup>d</sup></td>
              <td>3</td>
              <td>73.67</td>
              <td>74.71-77.51</td>
              <td>85.24</td>
              <td>77.58</td>
              <td>81.23</td>
            </tr>
            <tr valign="top">
              <td>PM2</td>
              <td>Complete</td>
              <td>N/A<sup>e</sup></td>
              <td>74.04</td>
              <td>74.35-77.16</td>
              <td>82.18</td>
              <td>78.76</td>
              <td>80.43</td>
            </tr>
            <tr valign="top">
              <td>PM3</td>
              <td>Reduced<sup>f</sup></td>
              <td>5</td>
              <td>74.73</td>
              <td>75.38-78.15</td>
              <td>84.40</td>
              <td>78.80</td>
              <td>81.50</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup>RCS: restricted cubic splines.</p>
          </fn>
          <fn id="table3fn2">
            <p><sup>b</sup>AUR ROC: area under the receiver operating characteristic.</p>
          </fn>
          <fn id="table3fn3">
            <p><sup>c</sup>PM: predictive model.</p>
          </fn>
          <fn id="table3fn4">
            <p><sup>d</sup>All variables (age, random blood sugar, cholesterol, non–high-density lipoproteins, estimated glomerular filtration rate, and BMI).</p>
          </fn>
          <fn id="table3fn5">
            <p><sup>e</sup>N/A: not applicable.</p>
          </fn>
          <fn id="table3fn6">
            <p><sup>f</sup>Reduced variables (age, random blood sugar, cholesterol, non–high-density lipoproteins, and estimated glomerular filtration rate).</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <fig id="figure2" position="float">
        <label>Figure 2</label>
        <caption>
          <p>The calibration curve for PM1. HbA<sub>1c</sub>: glycated hemoglobin. PM: predictive model.</p>
        </caption>
        <graphic xlink:href="medinform_v8i7e18963_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <fig id="figure3" position="float">
        <label>Figure 3</label>
        <caption>
          <p>The calibration curve for PM2. HbA<sub>1c</sub>: glycated hemoglobin. PM: predictive model.</p>
        </caption>
        <graphic xlink:href="medinform_v8i7e18963_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p><xref rid="figure4" ref-type="fig">Figure 4</xref> shows the ranking of importance for the variables used in the PM1 model. PM1 shows a different order of importance for the predictors than the order obtained from the original study. Age and RBS are of great importance in both studies. However, BMI is of the lowest importance when using the KAIMRC population, whereas in the original study it was ranked second.</p>
      <fig id="figure4" position="float">
        <label>Figure 4</label>
        <caption>
          <p>Order of importance of predictors for PM1. Chol: cholesterol. eGFR: estimated glomerular filtration rate. HDL: high-density lipoproteins. PM: predictive model. RBS: random blood sugar.</p>
        </caption>
        <graphic xlink:href="medinform_v8i7e18963_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>The PM3 model excludes the variable that showed the lowest importance, BMI. This model, when fitted using RCS with 5 knots, shows better performance using only the 5 predictors (age, RBS, cholesterol, eGFR, and non-HDL). The eGFR shows greater importance when fitted using RCS with 5 knots (&#62;0.05) than when fitted with 3 knots (&#60;0.05). The predictors’ importance order for PM3 is shown in <xref rid="figure5" ref-type="fig">Figure 5</xref>. PM3 achieves an average accuracy of 74.73%, with a better confidence interval (95% CI 75%-78%). The calibration curve for PM3 is identical to that of PM1.</p>
      <fig id="figure5" position="float">
        <label>Figure 5</label>
        <caption>
          <p>Order of importance of predictors for PM3. Chol: cholesterol. eGFR: estimated glomerular filtration rate. HDL: high-density lipoproteins. PM: predictive model. RBS: random blood sugar.</p>
        </caption>
        <graphic xlink:href="medinform_v8i7e18963_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>When using the PM2 model, the results show agreement with the results from PM1 for 93.27% (33,929/36,378) of predictions. The PM3 model with fewer predictors achieves a better performance and a similar percentage of predictions that are in agreement with the output from PM1 (33,937/36,378, 93.29%). Furthermore, the results show a strong degree of correlation among the probability outputs produced by the 3 models (<italic>r</italic>=0.97).</p>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Results</title>
        <p>Applying the method employed in the original study achieved an accuracy of 73% to 74% using a dataset collected from the Middle East, compared with 77% obtained from using a population from the United States in the original study. The findings from this replication study therefore confirm the conclusion from the original study that this form of modeling can help with predicting the levels of HbA<sub>1c</sub> in a blood test for nondiabetic patients using predictors extracted from EHR systems.</p>
        <p>The order of importance obtained for the predictors used by the multiple logistic regression on our dataset is different from the order of importance produced in the original study. The order for the predictors using the KAIMRC dataset, from the most to the least importance, is RBS, age, eGFR, cholesterol, non-HDL, and BMI. <xref ref-type="table" rid="table4">Table 4</xref> shows the importance rankings for the predictors obtained from the original study, as well as the rankings obtained from the 3 models used in this study.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Predictors importance rankings.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="210"/>
            <col width="70"/>
            <col width="70"/>
            <col width="90"/>
            <col width="110"/>
            <col width="110"/>
            <col width="120"/>
            <col width="70"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Study</td>
                <td>1st</td>
                <td>2nd</td>
                <td>3rd</td>
                <td>4th</td>
                <td>5th</td>
                <td>6th</td>
                <td>7th</td>
                <td>8th</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="bottom">
                <td colspan="2">Original study</td>
                <td>Age</td>
                <td>BMI</td>
                <td>RBS<sup>a</sup></td>
                <td>Race</td>
                <td>Non-HDL<sup>b</sup></td>
                <td>Cholesterol</td>
                <td>eGFR<sup>c</sup></td>
                <td>Smoking status</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Replication study</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>PM<sup>d</sup>1</td>
                <td>RBS</td>
                <td>Age</td>
                <td>Cholesterol</td>
                <td>Non-HDL</td>
                <td>eGFR</td>
                <td>BMI</td>
                <td>N/A<sup>e</sup></td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>PM2</td>
                <td>Age</td>
                <td>RBS</td>
                <td>Cholesterol</td>
                <td>Non-HDL</td>
                <td>BMI</td>
                <td>eGFR</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>PM3</td>
                <td>RBS</td>
                <td>Age</td>
                <td>eGFR</td>
                <td>Cholesterol</td>
                <td>Non-HDL</td>
                <td>BMI (excluded)</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>RBS: random blood sugar.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>HDL: high-density lipoproteins.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>eGFR: estimated glomerular filtration rate.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>PM: predictive model.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>BMI was one of the most important predictors in the population from the United States and demonstrated higher impact than the RBS and eGFR. However, it shows little importance for predicting the elevation level of HbA<sub>1c</sub> in the KAIMRC population. Indeed, the simpler calculator with a reduced number of variables (after excluding BMI) is able to achieve better prediction abilities (refer to <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> for details of the calculator). <xref rid="figure6" ref-type="fig">Figure 6</xref> summarizes the 10-folds performance achieved using the reported measures for all models, and reveals that there is a consistent prediction trend for PM3, especially in the AUR ROC, which shows little variation between the folds.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Box plots of the reported measures for the models. AUC ROC: area under the receiver operating characteristic. PM: predictive model.</p>
          </caption>
          <graphic xlink:href="medinform_v8i7e18963_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>This replication study shows that the ranking of the variables is largely based on the dataset and the model used for prediction. Variables with low importance in the prediction of HbA<sub>1c</sub> in one population may show greater or lesser importance when the model is applied on populations from different regions of the world. Interestingly, this can also happen when employing different predictive models and with different hyperparameters using the same population (for instance, eGFR shows higher importance when fitted to the model using RCS with 5 knots in PM3 than with 3 knots in PM1 and without RCS in PM2, as interpreted in <xref ref-type="table" rid="table4">Table 4</xref>).</p>
      </sec>
      <sec>
        <title>Limitations and Future Work</title>
        <p>We performed a differentiated replication using a population from a different region that was available to us. The 2 datasets have similar means and standard deviations for most of the variables, such as age, cholesterol, and non-HDL, as described in <xref ref-type="table" rid="table2">Table 2</xref>. However, there is a significant difference in the body mass index and random blood sugar variables, and the dispersion is large for both variables.</p>
        <p>The sample size and class balance affect the learning behavior of the models [<xref ref-type="bibr" rid="ref29">29</xref>]. The KAIMRC dataset is larger than the one used in the original study by 38%. The class balance is also different, with 26% of patients having elevated HbA<sub>1c</sub> (≥5.7%) and 74% with normal HbA<sub>1c</sub> (&#60;5.7%) in the original study compared with 60.60% (22,046/36,378) with elevated HbA<sub>1c</sub> (≥5.7%) and 39.40% (14,332/36,378) with normal HbA<sub>1c</sub> (&#60;5.7) in KAIMRC dataset.</p>
        <p>Although the population represented in this study is less heterogeneous with regard to ethic groups, the size of the KAIMRC dataset is larger than the one used in the original study. The prevalence of diabetes is also larger, being a sample from the population of Saudi Arabia. In terms of prevalence of diabetes, Saudi Arabia was ranked by the World Health Organization as being the second highest in the Middle East and seventh highest in the world [<xref ref-type="bibr" rid="ref30">30</xref>], with an 18.3% diabetes prevalence rate, according to the IDF, compared with 10.5% in the United States [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
        <p>In the original study, the model performance was compared with the models developed by Baan et al [<xref ref-type="bibr" rid="ref32">32</xref>] and Griffin et al [<xref ref-type="bibr" rid="ref33">33</xref>], which used different datasets [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. The main limitation in the comparison between the original study and the studies by Baan et al and Griffin et al is the absence of some variables that were used to create the calculators (refer to <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref> for details about the variables used in the corresponding studies). The same situation applies to this study, as the smoking status variable is missing in the KAIMRC dataset. The smoking prevalence in Saudi Arabia is between 2.4% to 52.3% among different age groups [<xref ref-type="bibr" rid="ref36">36</xref>]. However, other missing predictors, such as genetic or lifestyle characteristics [<xref ref-type="bibr" rid="ref37">37</xref>], which are difficult to collect and incorporate into the EHR systems, may help to explain the high rate of elevated levels of HbA<sub>1c</sub> in the KAIMRC population.</p>
        <p>After eliminating the variables that do not show significant impact on the prediction of HbA<sub>1c</sub> in the KAIMRC population, the results indicate that different regions in the world can have different weightings of predictors for HbA<sub>1c</sub> when using the approach of Wells et al. Although there are many studies that have demonstrated the relationship between diabetes prevalence and BMI [<xref ref-type="bibr" rid="ref38">38</xref>], some studies have shown that the obesity prevalence in Asian countries does not relate to the diabetes prevalence. The risk of diabetes occurs in patients with a lower BMI in Asian countries compared with patients from European countries [<xref ref-type="bibr" rid="ref39">39</xref>]. The prevalence of obesity in Asian countries is substantially less than in the United States, but Asian countries have a similar or higher prevalence of diabetes [<xref ref-type="bibr" rid="ref40">40</xref>]. However, neither Yoon et al [<xref ref-type="bibr" rid="ref39">39</xref>] nor Hu [<xref ref-type="bibr" rid="ref40">40</xref>] identifies a relationship between nondiabetic patients with elevated levels of HbA<sub>1c</sub> and obesity. <xref rid="figure7" ref-type="fig">Figure 7</xref> visualizes the class distribution for the BMI variable for the KAIMRC dataset. The figure shows that elevation of HbA<sub>1c</sub> exists with similar rates between low and high obesity ranges.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>HbA<sub>1c</sub> elevation for BMI ranges of King Abdullah International Medical Research Center patients. HbA<sub>1c</sub>: glycated hemoglobin.</p>
          </caption>
          <graphic xlink:href="medinform_v8i7e18963_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Advanced data mining techniques, such as deep machine learning models, are capable of finding hidden and complex correlations in large input spaces and datasets [<xref ref-type="bibr" rid="ref41">41</xref>]. Recently, machine learning models have shown great success in many domains (eg, natural language processing, image segmentation, and object detection), but there is still a lack of studies that apply those models to the medical domain using EHR data [<xref ref-type="bibr" rid="ref42">42</xref>]. As stated in the original study, maintaining security and privacy for medical datasets is a challenging task. However, with advanced technologies in data privacy and protection, such as differential privacy and data anonymization techniques [<xref ref-type="bibr" rid="ref43">43</xref>], it should be possible to minimize the security risk.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Replication studies provide an invaluable contribution to the validation, generalization, and continuation of scientific research. The differentiated replication presented in this study is aimed at validating the calculator used for predicting HbA<sub>1c</sub> and evaluating the method used to create the mathematical equation by training the multiple logistic regression algorithm using EHR datasets. The evaluation was performed using a dataset collected from a different population. The original and replicated calculators employ associated predictors that are routinely collected and stored in hospital systems.</p>
        <p>As explained in the “Introduction” section, this differentiated replication study used the same method to analyze a different population sample, with some differences in the form of the EHRs. As a replication, it was intended to investigate what changed and did not change in the outcomes.</p>
        <p>What did not change appreciably was the accuracy of the results produced using this method, with an accuracy range of 73.6% to 74.7% in our study compared with 77% in the original study. The set of predictors (when these could be compared) also did not change. Thus, given that a close replication of the original study is unavailable, the differentiated replication does confirm that, despite the notable differences between the two datasets, the use of multiple logistic regression is able to provide good predictions of HbA<sub>1c</sub> elevation levels.</p>
        <p>What did change was the order of importance for the set of predictors used in the calculator. Thus, we can conclude that the use of multiple logistic regression for prediction does need to be tuned to the characteristics of the population being assessed. While we cannot wholly rule out the cause of this difference in importance being due to differences in the form of the EHRs, it seems more likely that the characteristics of the population were an important factor.</p>
        <p>In terms of the role of replication itself, we would argue that this study demonstrates that while there is little difference in prediction accuracy when using multiple logistic regression with different populations (as might be expected), the influence of the different elements in the set of predictors is different. Due to that, we would argue that the generalization of simple statistical predictive models (calculators) is inappropriate. We suggest that creating advanced predictive models that can learn complex relationships using large multidimensional datasets may be a better way to exploit the increasing volumes of EHR data becoming available. Hence, further work will investigate applying advanced machine learning techniques to predict the elevation of HbA<sub>1c</sub> using the KAIMRC dataset.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Formulae for the calculated variables.</p>
        <media xlink:href="medinform_v8i7e18963_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 45 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Lab test and diagnostic codes.</p>
        <media xlink:href="medinform_v8i7e18963_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 72 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Units conversion formulae.</p>
        <media xlink:href="medinform_v8i7e18963_app3.pdf" xlink:title="PDF File  (Adobe PDF File), 73 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>PM3 Calculator details.</p>
        <media xlink:href="medinform_v8i7e18963_app4.pdf" xlink:title="PDF File  (Adobe PDF File), 95 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Variables used in the studies.</p>
        <media xlink:href="medinform_v8i7e18963_app5.pdf" xlink:title="PDF File  (Adobe PDF File), 58 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ADA</term>
          <def>
            <p>American Diabetes Association</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUR ROC</term>
          <def>
            <p>area under the receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">eGFR</term>
          <def>
            <p>estimated glomerular filtration rate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HbA<sub>1c</sub></term>
          <def>
            <p>glycated hemoglobin</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">HDL</term>
          <def>
            <p>high-density lipoprotein</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">IDF</term>
          <def>
            <p>International Diabetes Federation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">KAIMRC</term>
          <def>
            <p>King Abdullah International Medical Research Center</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">PM</term>
          <def>
            <p>predictive model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">RBS</term>
          <def>
            <p>random blood sugar</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">RCS</term>
          <def>
            <p>restricted cubic splines</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">T2DM</term>
          <def>
            <p>type 2 diabetes mellitus</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We would like to acknowledge the contribution of King Abdullah International Research Center (KAIMRC) for providing the dataset under the approved projects “Diabetes Early Warning System, Research Protocol SP14/042,” “Finding the Common Related Diseases With Diabetes Using Data Mining Association Techniques, Research Protocol SP15/064,” and extension project number RYD-17-417780-187503 to collect the newest dataset. We would also like to acknowledge the contribution by Professor Pali Hungin for providing feedback about the clinical aspects of the study.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>ZA was responsible for the designing, implementing, and building the statistical models. ZA and NAM were responsible for validating the models. ZA, DB, and NAM were responsible for the design of the replication study and for writing the manuscript. ZA and RA were responsible for extracting and describing the dataset. All authors participated in reviewing the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Karuranga</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>da Rocha Fernandes</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ohlrogge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Malanda</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>IDF Diabetes Atlas: Global estimates of diabetes prevalence for 2017 and projections for 2045</article-title>
          <source>Diabetes Res Clin Pract</source>
          <year>2018</year>
          <month>04</month>
          <volume>138</volume>
          <fpage>271</fpage>
          <lpage>281</lpage>
          <pub-id pub-id-type="doi">10.1016/j.diabres.2018.02.023</pub-id>
          <pub-id pub-id-type="medline">29496507</pub-id>
          <pub-id pub-id-type="pii">S0168-8227(18)30203-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wells</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lenoir</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Diaz-Garelli</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Futrell</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lockerman</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pantalone</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Kattan</surname>
              <given-names>MW</given-names>
            </name>
          </person-group>
          <article-title>Predicting Current Glycated Hemoglobin Values in Adults: Development of an Algorithm From the Electronic Health Record</article-title>
          <source>JMIR Med Inform</source>
          <year>2018</year>
          <month>10</month>
          <day>22</day>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>e10780</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2018/4/e10780/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/10780</pub-id>
          <pub-id pub-id-type="medline">30348631</pub-id>
          <pub-id pub-id-type="pii">v6i4e10780</pub-id>
          <pub-id pub-id-type="pmcid">PMC6231807</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ogurtsova</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>da Rocha Fernandes</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Linnenkamp</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Guariguata</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Cavan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Makaroff</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>IDF Diabetes Atlas: Global estimates for the prevalence of diabetes for 2015 and 2040</article-title>
          <source>Diabetes Res Clin Pract</source>
          <year>2017</year>
          <month>06</month>
          <volume>128</volume>
          <fpage>40</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1016/j.diabres.2017.03.024</pub-id>
          <pub-id pub-id-type="medline">28437734</pub-id>
          <pub-id pub-id-type="pii">S0168-8227(17)30375-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beagley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guariguata</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Weil</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Motala</surname>
              <given-names>AA</given-names>
            </name>
          </person-group>
          <article-title>Global estimates of undiagnosed diabetes in adults</article-title>
          <source>Diabetes Research and Clinical Practice</source>
          <year>2014</year>
          <month>02</month>
          <volume>103</volume>
          <issue>2</issue>
          <fpage>150</fpage>
          <lpage>160</lpage>
          <pub-id pub-id-type="doi">10.1016/j.diabres.2013.11.001</pub-id>
          <pub-id pub-id-type="medline">24300018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>K P</given-names>
            </name>
            <name name-style="western">
              <surname>Pavlovich</surname>
              <given-names>J G</given-names>
            </name>
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Little</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>England</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>C M</given-names>
            </name>
          </person-group>
          <article-title>What is hemoglobin A1c? An analysis of glycated hemoglobins by electrospray ionization mass spectrometry</article-title>
          <source>Clin Chem</source>
          <year>1998</year>
          <month>09</month>
          <volume>44</volume>
          <issue>9</issue>
          <fpage>1951</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="medline">9732983</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koenig</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Saudek</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lehrman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cerami</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Correlation of Glucose Regulation and Hemoglobin A                                        in Diabetes Mellitus</article-title>
          <source>N Engl J Med</source>
          <year>1976</year>
          <month>08</month>
          <day>19</day>
          <volume>295</volume>
          <issue>8</issue>
          <fpage>417</fpage>
          <lpage>420</lpage>
          <pub-id pub-id-type="doi">10.1056/nejm197608192950804</pub-id>
          <pub-id pub-id-type="medline">934240</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>International Expert Committee</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>International Expert Committee report on the role of the A1C assay in the diagnosis of diabetes</article-title>
          <source>Diabetes Care</source>
          <year>2009</year>
          <month>07</month>
          <volume>32</volume>
          <issue>7</issue>
          <fpage>1327</fpage>
          <lpage>34</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19502545"/>
          </comment>
          <pub-id pub-id-type="doi">10.2337/dc09-9033</pub-id>
          <pub-id pub-id-type="medline">19502545</pub-id>
          <pub-id pub-id-type="pii">dc09-9033</pub-id>
          <pub-id pub-id-type="pmcid">PMC2699715</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>American Diabetes Association</collab>
          </person-group>
          <article-title>Diagnosis and classification of diabetes mellitus</article-title>
          <source>Diabetes Care</source>
          <year>2010</year>
          <month>01</month>
          <volume>33 Suppl 1</volume>
          <fpage>S62</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20042775"/>
          </comment>
          <pub-id pub-id-type="doi">10.2337/dc10-S062</pub-id>
          <pub-id pub-id-type="medline">20042775</pub-id>
          <pub-id pub-id-type="pii">33/Supplement_1/S62</pub-id>
          <pub-id pub-id-type="pmcid">PMC2797383</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ackermann</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>YJ</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>DF</given-names>
            </name>
            <name name-style="western">
              <surname>Gregg</surname>
              <given-names>EW</given-names>
            </name>
          </person-group>
          <article-title>Identifying Adults at High Risk for Diabetes and Cardiovascular Disease Using Hemoglobin A1c</article-title>
          <source>American Journal of Preventive Medicine</source>
          <year>2011</year>
          <month>1</month>
          <volume>40</volume>
          <issue>1</issue>
          <fpage>11</fpage>
          <lpage>17</lpage>
          <pub-id pub-id-type="doi">10.1016/j.amepre.2010.09.022</pub-id>
          <pub-id pub-id-type="medline">21146762</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bonora</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Tuomilehto</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The pros and cons of diagnosing diabetes with A1C</article-title>
          <source>Diabetes Care</source>
          <year>2011</year>
          <month>05</month>
          <volume>34 Suppl 2</volume>
          <fpage>S184</fpage>
          <lpage>90</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21525453"/>
          </comment>
          <pub-id pub-id-type="doi">10.2337/dc11-s216</pub-id>
          <pub-id pub-id-type="medline">21525453</pub-id>
          <pub-id pub-id-type="pii">34/Supplement_2/S184</pub-id>
          <pub-id pub-id-type="pmcid">PMC3632159</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gregg</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>DF</given-names>
            </name>
            <name name-style="western">
              <surname>Barker</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>McKeever Bullard</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Imperatore</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Albright</surname>
              <given-names>AL</given-names>
            </name>
          </person-group>
          <article-title>Response to Comment on: Zhang et al. A1C Level and Future Risk of Diabetes: A Systematic Review. Diabetes Care 2010;33:1665-1673</article-title>
          <source>Diabetes Care</source>
          <year>2011</year>
          <month>01</month>
          <day>26</day>
          <volume>34</volume>
          <issue>2</issue>
          <fpage>e21</fpage>
          <lpage>e21</lpage>
          <pub-id pub-id-type="doi">10.2337/dc10-2155</pub-id>
          <pub-id pub-id-type="medline">20587727</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Iqbal</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Clinciu</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>YE</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jian</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Using hemoglobin A1C as a predicting model for time interval from pre-diabetes progressing to diabetes</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>e104263</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0104263"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0104263</pub-id>
          <pub-id pub-id-type="medline">25093755</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-54250</pub-id>
          <pub-id pub-id-type="pmcid">PMC4122428</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pei</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hsia</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Variability in hemoglobin A1c predicts all-cause mortality in patients with type 2 diabetes</article-title>
          <source>J Diabetes Complications</source>
          <year>2012</year>
          <volume>26</volume>
          <issue>4</issue>
          <fpage>296</fpage>
          <lpage>300</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jdiacomp.2012.03.028</pub-id>
          <pub-id pub-id-type="medline">22626873</pub-id>
          <pub-id pub-id-type="pii">S1056-8727(12)00068-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khaw</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wareham</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bingham</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Luben</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Welch</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Day</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Association of hemoglobin A1c with cardiovascular disease and mortality in adults: the European prospective investigation into cancer in Norfolk</article-title>
          <source>Ann Intern Med</source>
          <year>2004</year>
          <month>09</month>
          <day>21</day>
          <volume>141</volume>
          <issue>6</issue>
          <fpage>413</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.7326/0003-4819-141-6-200409210-00006</pub-id>
          <pub-id pub-id-type="medline">15381514</pub-id>
          <pub-id pub-id-type="pii">141/6/413</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pradhan</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Rifai</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Buring</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Ridker</surname>
              <given-names>PM</given-names>
            </name>
          </person-group>
          <article-title>Hemoglobin A1c predicts diabetes but not cardiovascular disease in nondiabetic women</article-title>
          <source>Am J Med</source>
          <year>2007</year>
          <month>08</month>
          <volume>120</volume>
          <issue>8</issue>
          <fpage>720</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/17679132"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.amjmed.2007.03.022</pub-id>
          <pub-id pub-id-type="medline">17679132</pub-id>
          <pub-id pub-id-type="pii">S0002-9343(07)00452-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC2585540</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McCarter</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hempe</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Chalew</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>Mean blood glucose and biological variation have greater influence on HbA1c levels than glucose instability: an analysis of data from the Diabetes Control and Complications Trial</article-title>
          <source>Diabetes Care</source>
          <year>2006</year>
          <month>02</month>
          <volume>29</volume>
          <issue>2</issue>
          <fpage>352</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.2337/diacare.29.02.06.dc05-1594</pub-id>
          <pub-id pub-id-type="medline">16443886</pub-id>
          <pub-id pub-id-type="pii">29/2/352</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nathan</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Kuenen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Borg</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Schoenfeld</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Heine</surname>
              <given-names>RJ</given-names>
            </name>
            <collab>A1c-Derived Average Glucose Study Group</collab>
          </person-group>
          <article-title>Translating the A1C assay into estimated average glucose values</article-title>
          <source>Diabetes Care</source>
          <year>2008</year>
          <month>08</month>
          <volume>31</volume>
          <issue>8</issue>
          <fpage>1473</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18540046"/>
          </comment>
          <pub-id pub-id-type="doi">10.2337/dc08-0545</pub-id>
          <pub-id pub-id-type="medline">18540046</pub-id>
          <pub-id pub-id-type="pii">dc08-0545</pub-id>
          <pub-id pub-id-type="pmcid">PMC2742903</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kazemi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hosseini</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bahrampour</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Faghihimani</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Amini</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Predicting of trend of hemoglobin a1c in type 2 diabetes: a longitudinal linear mixed model</article-title>
          <source>Int J Prev Med</source>
          <year>2014</year>
          <month>10</month>
          <volume>5</volume>
          <issue>10</issue>
          <fpage>1274</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25400886"/>
          </comment>
          <pub-id pub-id-type="medline">25400886</pub-id>
          <pub-id pub-id-type="pmcid">PMC4223947</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rose</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ketchell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Markova</surname>
              <given-names>Tsveti</given-names>
            </name>
          </person-group>
          <article-title>Clinical inquiries. Does daily monitoring of blood glucose predict hemoglobin A1c levels?</article-title>
          <source>J Fam Pract</source>
          <year>2003</year>
          <month>06</month>
          <volume>52</volume>
          <issue>6</issue>
          <fpage>485</fpage>
          <lpage>90</lpage>
          <pub-id pub-id-type="medline">12791231</pub-id>
          <pub-id pub-id-type="pii">jfp_0603_5206n</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alhassan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Budgen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Alessa</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alshammari</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Daghstani</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Al moubayed</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Collaborative Denoising Autoencoder for High Glycated Haemoglobin Prediction</article-title>
          <year>2019</year>
          <conf-name>International Conference on Artificial Neural Networks</conf-name>
          <conf-date>Sep 17-19, 2019</conf-date>
          <conf-loc>Munich, Germany</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-030-30493-5_34</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alhassan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Budgen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Alshammari</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Daghstani</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>McGough</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Al moubayed</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Stacked Denoising Autoencoders for Mortality Risk Prediction Using Imbalanced Clinical Data</article-title>
          <year>2018</year>
          <conf-name>17th IEEE International Conference on Machine Learning and Applications (ICMLA)</conf-name>
          <conf-date>Dec 17-20, 2018</conf-date>
          <conf-loc>Orlando, FL</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icmla.2018.00087</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harrell</surname>
              <given-names>F E</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K L</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>D B</given-names>
            </name>
          </person-group>
          <article-title>Multivariable prognostic models: issues in developing models, evaluating assumptions and adequacy, and measuring and reducing errors</article-title>
          <source>Stat Med</source>
          <year>1996</year>
          <month>02</month>
          <day>28</day>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>361</fpage>
          <lpage>87</lpage>
          <pub-id pub-id-type="doi">10.1002/(SICI)1097-0258(19960229)15:4&#60;361::AID-SIM168&#62;3.0.CO;2-4</pub-id>
          <pub-id pub-id-type="medline">8668867</pub-id>
          <pub-id pub-id-type="pii">10.1002/(SICI)1097-0258(19960229)15:4&#60;361::AID-SIM168&#62;3.0.CO;2-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gómez</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Juristo</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Vegas</surname>
              <given-names>S</given-names>
            </name>
            <collab>editors</collab>
          </person-group>
          <article-title>Replications types in experimental disciplines</article-title>
          <year>2010</year>
          <conf-name>ACM-IEEE international Symposium on Empirical Software Engineering and Measurement</conf-name>
          <conf-date>Sep 16-17, 2010</conf-date>
          <conf-loc>Bolzano-Bozen, Italy</conf-loc>
          <pub-id pub-id-type="doi">10.1145/1852786.1852790</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lindsay</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Ehrenberg</surname>
              <given-names>ASC</given-names>
            </name>
          </person-group>
          <article-title>The Design of Replicated Studies</article-title>
          <source>The American Statistician</source>
          <year>1993</year>
          <month>08</month>
          <volume>47</volume>
          <issue>3</issue>
          <fpage>217</fpage>
          <pub-id pub-id-type="doi">10.2307/2684982</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuhn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <source>Applied Predictive Modeling</source>
          <year>2013</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Springer-Verlag</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>CJ</given-names>
            </name>
          </person-group>
          <article-title>[Generalized Additive Models]: Comment</article-title>
          <source>Statist Sci</source>
          <year>1986</year>
          <month>08</month>
          <volume>1</volume>
          <issue>3</issue>
          <fpage>312</fpage>
          <lpage>314</lpage>
          <pub-id pub-id-type="doi">10.1214/ss/1177013607</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Austin</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Steyerberg</surname>
              <given-names>EW</given-names>
            </name>
          </person-group>
          <article-title>Interpreting the concordance statistic of a logistic regression model: relation to the variance and odds ratio of a continuous explanatory variable</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2012</year>
          <month>06</month>
          <day>20</day>
          <volume>12</volume>
          <fpage>82</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/1471-2288-12-82"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2288-12-82</pub-id>
          <pub-id pub-id-type="medline">22716998</pub-id>
          <pub-id pub-id-type="pii">1471-2288-12-82</pub-id>
          <pub-id pub-id-type="pmcid">PMC3528632</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saito</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rehmsmeier</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The precision-recall plot is more informative than the ROC plot when evaluating binary classifiers on imbalanced datasets</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>e0118432</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0118432"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0118432</pub-id>
          <pub-id pub-id-type="medline">25738806</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-26790</pub-id>
          <pub-id pub-id-type="pmcid">PMC4349800</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Batista</surname>
              <given-names>GEAPA</given-names>
            </name>
            <name name-style="western">
              <surname>Prati</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Monard</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>A study of the behavior of several methods for balancing machine learning training data</article-title>
          <source>SIGKDD Explor. Newsl</source>
          <year>2004</year>
          <month>06</month>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>20</fpage>
          <lpage>29</lpage>
          <pub-id pub-id-type="doi">10.1145/1007730.1007735</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al Dawish</surname>
              <given-names>Mohamed Abdulaziz</given-names>
            </name>
            <name name-style="western">
              <surname>Robert</surname>
              <given-names>Asirvatham Alwin</given-names>
            </name>
            <name name-style="western">
              <surname>Braham</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Al Hayek</surname>
              <given-names>Ayman Abdallah</given-names>
            </name>
            <name name-style="western">
              <surname>Al Saeed</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>Rania Ahmed</given-names>
            </name>
            <name name-style="western">
              <surname>Al Sabaan</surname>
              <given-names>Fahad Sulaiman</given-names>
            </name>
          </person-group>
          <article-title>Diabetes Mellitus in Saudi Arabia: A Review of the Recent Literature</article-title>
          <source>Curr Diabetes Rev</source>
          <year>2016</year>
          <volume>12</volume>
          <issue>4</issue>
          <fpage>359</fpage>
          <lpage>368</lpage>
          <pub-id pub-id-type="doi">10.2174/1573399811666150724095130</pub-id>
          <pub-id pub-id-type="medline">26206092</pub-id>
          <pub-id pub-id-type="pii">CDR-EPUB-69030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Centers for Disease Control and Prevention</collab>
          </person-group>
          <article-title>National Diabetes Statistics Report, 2020</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2020</year>
          <access-date>2020-06-22</access-date>
          <publisher-loc>Atlanta, GA</publisher-loc>
          <publisher-name>Centers for Disease Control and Prevention, US Department of Health and Human Services</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/diabetes/pdfs/data/statistics/national-diabetes-statistics-report.pdf">https://www.cdc.gov/diabetes/pdfs/data/statistics/national-diabetes-statistics-report.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baan</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Ruige</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Stolk</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Witteman</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Dekker</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Heine</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Feskens</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>Performance of a predictive model to identify undiagnosed diabetes in a health care setting</article-title>
          <source>Diabetes Care</source>
          <year>1999</year>
          <month>02</month>
          <volume>22</volume>
          <issue>2</issue>
          <fpage>213</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://care.diabetesjournals.org/cgi/pmidlookup?view=long&#38;pmid=10333936"/>
          </comment>
          <pub-id pub-id-type="doi">10.2337/diacare.22.2.213</pub-id>
          <pub-id pub-id-type="medline">10333936</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Little</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Hales</surname>
              <given-names>CN</given-names>
            </name>
            <name name-style="western">
              <surname>Kinmonth</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Wareham</surname>
              <given-names>NJ</given-names>
            </name>
          </person-group>
          <article-title>Diabetes risk score: towards earlier detection of type 2 diabetes in general practice</article-title>
          <source>Diabetes Metab Res Rev</source>
          <year>2000</year>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>164</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1002/1520-7560(200005/06)16:3&#60;164::aid-dmrr103&#62;3.0.co;2-r</pub-id>
          <pub-id pub-id-type="medline">10867715</pub-id>
          <pub-id pub-id-type="pii">10.1002/1520-7560(200005/06)16:3&#60;164::AID-DMRR103&#62;3.0.CO;2-R</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>DRR</given-names>
            </name>
            <name name-style="western">
              <surname>Wareham</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Byrne</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>PMS</given-names>
            </name>
            <name name-style="western">
              <surname>Cox</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Cox</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Day</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Hales</surname>
              <given-names>CN</given-names>
            </name>
            <name name-style="western">
              <surname>Palmer</surname>
              <given-names>CR</given-names>
            </name>
          </person-group>
          <article-title>Undiagnosed glucose intolerance in the community: the Isle of Ely Diabetes Project</article-title>
          <source>Diabet Med</source>
          <year>1995</year>
          <month>01</month>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>30</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1464-5491.1995.tb02058.x</pub-id>
          <pub-id pub-id-type="medline">7712700</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kinmonth</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Spiegal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Woodcock</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Developing a training programme in patient-centred consulting for evaluation in a randomised controlled trial; diabetes care from diagnosis in British primary care</article-title>
          <source>Patient Educ Couns</source>
          <year>1996</year>
          <month>10</month>
          <volume>29</volume>
          <issue>1</issue>
          <fpage>75</fpage>
          <lpage>86</lpage>
          <pub-id pub-id-type="doi">10.1016/0738-3991(96)00936-6</pub-id>
          <pub-id pub-id-type="medline">9006224</pub-id>
          <pub-id pub-id-type="pii">0738399196009366</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bassiony</surname>
              <given-names>MM</given-names>
            </name>
          </person-group>
          <article-title>Smoking in Saudi Arabia</article-title>
          <source>Saudi Med J</source>
          <year>2009</year>
          <month>07</month>
          <volume>30</volume>
          <issue>7</issue>
          <fpage>876</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="medline">19617999</pub-id>
          <pub-id pub-id-type="pii">20081133'</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elhadd</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Amoudi</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Alzahrani</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology, clinical and complications profile of diabetes in Saudi Arabia: a review</article-title>
          <source>Ann Saudi Med</source>
          <year>2007</year>
          <volume>27</volume>
          <issue>4</issue>
          <fpage>241</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.annsaudimed.net/doi/full/10.5144/0256-4947.2007.241?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.5144/0256-4947.2007.241</pub-id>
          <pub-id pub-id-type="medline">17684435</pub-id>
          <pub-id pub-id-type="pii">07-204</pub-id>
          <pub-id pub-id-type="pmcid">PMC6074292</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boffetta</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>McLerran</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Inoue</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sinha</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Tsugane</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Irie</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Tamakoshi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tsuji</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Kuriyama</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Matsuo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Satoh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ahsan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pednekar</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Sasazuki</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sairenchi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nagai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tanaka</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nishino</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Koh</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Thornquist</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rolland</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Potter</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>Body mass index and diabetes in Asia: a cross-sectional pooled analysis of 900,000 individuals in the Asia cohort consortium</article-title>
          <source>PLoS One</source>
          <year>2011</year>
          <volume>6</volume>
          <issue>6</issue>
          <fpage>e19930</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0019930"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0019930</pub-id>
          <pub-id pub-id-type="medline">21731609</pub-id>
          <pub-id pub-id-type="pii">10-PONE-RA-19851</pub-id>
          <pub-id pub-id-type="pmcid">PMC3120751</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zimmet</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Son</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Epidemic obesity and type 2 diabetes in Asia</article-title>
          <source>Lancet</source>
          <year>2006</year>
          <month>11</month>
          <day>11</day>
          <volume>368</volume>
          <issue>9548</issue>
          <fpage>1681</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(06)69703-1</pub-id>
          <pub-id pub-id-type="medline">17098087</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(06)69703-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>FB</given-names>
            </name>
          </person-group>
          <article-title>Globalization of diabetes: the role of diet, lifestyle, and genes</article-title>
          <source>Diabetes Care</source>
          <year>2011</year>
          <month>06</month>
          <volume>34</volume>
          <issue>6</issue>
          <fpage>1249</fpage>
          <lpage>57</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21617109"/>
          </comment>
          <pub-id pub-id-type="doi">10.2337/dc11-0442</pub-id>
          <pub-id pub-id-type="medline">21617109</pub-id>
          <pub-id pub-id-type="pii">34/6/1249</pub-id>
          <pub-id pub-id-type="pmcid">PMC3114340</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wischmeyer</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rademacher</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <source>Regulating Artificial Intelligence</source>
          <year>2020</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer International Publishing</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harerimana</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jang</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Deep Learning for Electronic Health Records Analytics</article-title>
          <source>IEEE Access</source>
          <year>2019</year>
          <volume>7</volume>
          <fpage>101245</fpage>
          <lpage>101259</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2019.2928363</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abadi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Goodfellow</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>McMahan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Mironov</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Talwar</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Deep learning with differential privacy</article-title>
          <year>2016</year>
          <conf-name>Proceedings of the 2016 ACM SIGSAC Conference on Computer and Communications Security</conf-name>
          <conf-date>Oct 24-28, 2016</conf-date>
          <conf-loc>Vienna, Austria</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2976749.2978318</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
