<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i5e25237</article-id>
      <article-id pub-id-type="pmid">34028357</article-id>
      <article-id pub-id-type="doi">10.2196/25237</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Improving Current Glycated Hemoglobin Prediction in Adults: Use of Machine Learning Algorithms With Electronic Health Records</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Veeranki</surname>
            <given-names>Sai</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Agakov</surname>
            <given-names>Felix</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Doogan</surname>
            <given-names>Caitlin</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Alhassan</surname>
            <given-names>Zakhriya</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6644-1656</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Watson</surname>
            <given-names>Matthew</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6375-3905</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Budgen</surname>
            <given-names>David</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7143-0241</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Alshammari</surname>
            <given-names>Riyad</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0529-2458</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Alessa</surname>
            <given-names>Ali</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0426-7445</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Al Moubayed</surname>
            <given-names>Noura</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science</institution>
            <institution>Durham University</institution>
            <addr-line>Mountjoy Centre</addr-line>
            <addr-line>Durham, DH1 3LE</addr-line>
            <country>United Kingdom</country>
            <phone>44 1913 341724 ext 41749</phone>
            <email>noura.al-moubayed@durham.ac.uk</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8942-355X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science</institution>
        <institution>Durham University</institution>
        <addr-line>Durham</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>College of Computer Science and Engineering</institution>
        <institution>University of Jeddah</institution>
        <addr-line>Jeddah</addr-line>
        <country>Saudi Arabia</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>National Center for Artificial Intelligence</institution>
        <institution>Saudi Data and Artificial Intelligence Authority</institution>
        <addr-line>Riyadh</addr-line>
        <country>Saudi Arabia</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Information Technology Programs</institution>
        <institution>Institute of Public Administration</institution>
        <addr-line>Riyadh</addr-line>
        <country>Saudi Arabia</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Noura Al Moubayed <email>noura.al-moubayed@durham.ac.uk</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>24</day>
        <month>5</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>5</issue>
      <elocation-id>e25237</elocation-id>
      <history>
        <date date-type="received">
          <day>23</day>
          <month>10</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>13</day>
          <month>11</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>5</day>
          <month>1</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>22</day>
          <month>4</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Zakhriya Alhassan, Matthew Watson, David Budgen, Riyad Alshammari, Ali Alessa, Noura Al Moubayed. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 24.05.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/5/e25237" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Predicting the risk of glycated hemoglobin (HbA<sub>1c</sub>) elevation can help identify patients with the potential for developing serious chronic health problems, such as diabetes. Early preventive interventions based upon advanced predictive models using electronic health records data for identifying such patients can ultimately help provide better health outcomes.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>Our study investigated the performance of predictive models to forecast HbA<sub>1c</sub> elevation levels by employing several machine learning models. We also examined the use of patient electronic health record longitudinal data in the performance of the predictive models. Explainable methods were employed to interpret the decisions made by the black box models.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This study employed multiple logistic regression, random forest, support vector machine, and logistic regression models, as well as a deep learning model (multilayer perceptron) to classify patients with normal (&#60;5.7%) and elevated (≥5.7%) levels of HbA<sub>1c</sub>. We also integrated current visit data with historical (longitudinal) data from previous visits. Explainable machine learning methods were used to interrogate the models and provide an understanding of the reasons behind the decisions made by the models. All models were trained and tested using a large data set from Saudi Arabia with 18,844 unique patient records.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The machine learning models achieved promising results for predicting current HbA<sub>1c</sub> elevation risk. When coupled with longitudinal data, the machine learning models outperformed the multiple logistic regression model used in the comparative study. The multilayer perceptron model achieved an accuracy of 83.22% for the area under receiver operating characteristic curve when used with historical data. All models showed a close level of agreement on the contribution of random blood sugar and age variables with and without longitudinal data.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study shows that machine learning models can provide promising results for the task of predicting current HbA<sub>1c</sub> levels (≥5.7% or less). Using patients’ longitudinal data improved the performance and affected the relative importance for the predictors used. The models showed results that are consistent with comparable studies.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>glycated hemoglobin HbA<sub>1c</sub></kwd>
        <kwd>prediction</kwd>
        <kwd>machine learning</kwd>
        <kwd>deep learning</kwd>
        <kwd>neural network</kwd>
        <kwd>multilayer perceptron</kwd>
        <kwd>electronic health records</kwd>
        <kwd>time series data</kwd>
        <kwd>longitudinal data</kwd>
        <kwd>diabetes</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>The level of glycated hemoglobin (HbA<sub>1c</sub>) is used to measure the average glucose concentration in red blood cells [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Unlike other glucose blood tests, such as random blood sugar (RBS) and fasting blood sugar (FBS), HbA<sub>1c</sub> provides a long-term measure of a patient’s blood glucose levels [<xref ref-type="bibr" rid="ref3">3</xref>]. The HbA<sub>1c</sub> test can therefore provide physicians with a reliable means of monitoring a patient’s hyperglycemia without requiring the patient to undertake overnight fasting prior to being tested.</p>
        <p>A concentration of 6.5% for the HbA<sub>1c</sub> in patient blood is considered as the cutoff point for the diagnosis of diabetes [<xref ref-type="bibr" rid="ref4">4</xref>]. However, patients with a concentration of less than 6.5% are not completely excluded from a diabetes diagnosis, as the range of elevation levels (5.7%≤ HbA<sub>1c</sub> &#60;6.5%) can indicate the future onset of diabetes. Therefore, HbA<sub>1c</sub> can act as an early predictor for the potential development of type-2 diabetes mellitus (T2DM) [<xref ref-type="bibr" rid="ref2">2</xref>]. Ackermann et al [<xref ref-type="bibr" rid="ref3">3</xref>] suggested using the HbA<sub>1c</sub> test as a measure for identifying those adults who are at a greater risk of developing T2DM in the future.</p>
        <p>Research has shown that reducing HbA<sub>1c</sub> levels can significantly reduce the possibility of developing serious complications. Hence, close monitoring of HbA<sub>1c</sub> levels is recommended for all diabetic patients and those with the potential for developing diabetes [<xref ref-type="bibr" rid="ref5">5</xref>]. It is also suggested that diabetic and nondiabetic patients with raised HbA<sub>1c</sub> levels should be clinically checked and monitored as a preventive intervention to avoid developing T2DM [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
        <p>Currently, the clinical data collected from patient visits consists of a set of readings for vital signs and lab tests, diagnoses, physicians’ notes, and treatments that are stored in electronic health records (EHRs). These are collected on an irregular basis, according to clinical needs, and stored with an associated time stamp.</p>
        <p>In recent years, machine learning models have shown powerful capabilities for analyzing and understanding complex data across a wide variety of applications. Our research question for this study was as follows: “Can HbA<sub>1c</sub> prediction be improved by using machine learning with longitudinal data that are normally available in EHR systems?”</p>
        <p>This paper reports an investigation into the performance of machine learning models to predict current HbA<sub>1c</sub> levels as a binary classification problem using EHR data. Nondiabetic patients with an HbA<sub>1c</sub> level of 5.7% or more are considered to have an elevated HbA<sub>1c</sub>, while those with levels lower than this are considered normal. The models combine current visit data with extra features (independent variables) extracted from previous visits by patients. We used explainable methods to rank the features in order of their importance to the decision made by each of the models. To the best of our knowledge, this study is the first to employ machine learning models that use longitudinal data from EHR systems for the purpose of HbA<sub>1c</sub> elevation risk prediction. This study is also the first to use explainable machine learning techniques to explain the classification decisions made by black box models, support vector machine (SVM), and multilayer perceptron (MLP), in predicting HbA<sub>1c</sub> elevation risk (≥5.7%), in order to better understand the behavior of the model.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <p>EHR data have been intensively investigated for a variety of medical decision support tasks [<xref ref-type="bibr" rid="ref7">7</xref>]. These tasks include the analysis of complex patterns and prediction of major medical events (for example, diagnostic imaging and gene interactions) [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Several studies have demonstrated the successful employment of EHR data with prediction models [<xref ref-type="bibr" rid="ref10">10</xref>]. For instance, machine learning has been intensively used with EHR data in diagnosing diabetes and discovering its related patterns [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref15">15</xref>]. However, we are not aware of any studies that have explored machine learning models for the prediction of current elevated HbA<sub>1c</sub> levels using EHR data from a nondiabetic population or the impact of patient longitudinal data on the effectiveness of such predictive machine learning models.</p>
        <p>Several studies have investigated the association between HbA<sub>1c</sub> levels and clinical variables using statistical models [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. A study by Rose et al [<xref ref-type="bibr" rid="ref18">18</xref>] discussed the correlation between RBS and HbA<sub>1c</sub> levels. Stanley et al [<xref ref-type="bibr" rid="ref19">19</xref>] used a linear regression model for imputation of missing HbA<sub>1c</sub> data. Their model calculates HbA<sub>1c</sub> levels for patient records with missing HbA<sub>1c</sub> values as continuous and categorical values and uses 4 predictors extracted from an EHR system—RBS, FBS, age, and gender—as predictors to calculate the level of HbA<sub>1c</sub> for a diabetic population. Simone et al [<xref ref-type="bibr" rid="ref20">20</xref>] used linear regression models to predict HbA<sub>1c</sub> levels after 6 years for nondiabetic patients using different populations.</p>
        <p>A study by Wells et al [<xref ref-type="bibr" rid="ref21">21</xref>] in 2018 was the first to focus on predicting current HbA<sub>1c</sub> elevation levels for nondiabetic patients through use of an EHR data set. Multiple logistic regression (MLR) was employed to calculate the probability of a patient having an elevated HbA<sub>1c</sub> level (≥5.7%). The data set was extracted from an EHR system used in the United States. The authors used 8 independent variables fitted to the model using restricted cubic splines with 3 knots to formulate the final equation. The performance of the MLR model was compared to that of the models used by Baan et al [<xref ref-type="bibr" rid="ref22">22</xref>] and Griffin et al [<xref ref-type="bibr" rid="ref23">23</xref>]. However, the models by Baan and Griffin aimed at predicting the onset of patients’ diabetes rather than predicting HbA<sub>1c</sub> levels for nondiabetic patients. In addition, the experimental data set used by Wells et al to train and test their model was imbalanced with 74% of the samples having normal HbA<sub>1c</sub> levels (5.7%) and only 26% of the samples having elevated HbA<sub>1c</sub> levels (≥5.7%).</p>
        <p>We performed a differentiated replication of the study by Wells et al [<xref ref-type="bibr" rid="ref21">21</xref>] using the more balanced King Abdullah International Medical Research Center (KAIMRC) data set [<xref ref-type="bibr" rid="ref24">24</xref>]. Although the significant variables identified in our replication were in general agreement with those of the original study, there were some differences in the ranking of importance for these, suggesting that such models do need to be “tuned” to the characteristics of different populations.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design</title>
        <p>To study the impact of using advanced predictive models with EHR data to predict current HbA<sub>1c</sub> levels, we employed the MLR, random forest (RF), SVM, and logistic regression (LR) models, as well as a deep learning model, MLP [<xref ref-type="bibr" rid="ref25">25</xref>]. The problem was formulated into a binary classification problem whereby the target variable, HbA<sub>1c</sub> level, was encoded as 1 when the level of HbA<sub>1c</sub> was 5.7% or more and with 0 otherwise. The results obtained from using these models were compared to those obtained from employing the model used by Wells et al with the KAIMRC data set (detailed in the Data Set subsection).</p>
        <p>The performance of the models was investigated using current visit data only and with additional longitudinal data from current and previous visits. The performance of each model was evaluated using measures commonly employed in clinical applications. For the SVM and MLP models, the relative importance of the features was also calculated using explainable machine learning techniques.</p>
      </sec>
      <sec>
        <title>Explainable Methods for Black Box Models</title>
        <p>Using black box machine learning models in health care can have adverse effects on the trust and confidence placed in their outcomes; the risk of misclassification is potentially too high for clinicians to confidently use black box models for high risk health care decisions, and not being able to interpret a model’s decision exacerbates this problem [<xref ref-type="bibr" rid="ref26">26</xref>]. Explainable methods for machine learning models allow interpretable outcomes that can expose the reasons behind the decision made by the model [<xref ref-type="bibr" rid="ref27">27</xref>]. This transparency provides both health professionals and patients with the confidence and trust in the outcome of the models. The widely used Shapley Additive Explanations (SHAP) values [<xref ref-type="bibr" rid="ref28">28</xref>] and local interpretable model-agnostic explanations (LIME) score [<xref ref-type="bibr" rid="ref29">29</xref>] techniques have therefore been used to provide a degree of transparency to our deep learning model.</p>
        <p>SHAP values are derived from Shapley values used in game theory and provide a method of calculating the contribution of each feature (variable) to the final prediction via the GradientSHAP approximation. This is achieved for each feature by comparing the prediction the model makes when the feature is present with the prediction obtained when the feature takes some baseline value [<xref ref-type="bibr" rid="ref28">28</xref>]. Consequently, the SHAP values for a given input “explain” how each feature affects the output of the model when compared to the baseline (or “default”) output of the model. We used SHAP values to interpret our black box models, so they could be efficiently calculated, and their use enabled a global view of the model to be constructed through the computation of SHAP values from across the whole data set.</p>
        <p>SHAP values were computed using the feature’s mean marginal contribution across different coalitions of all features. SHAP values themselves are computationally intensive to compute, and so approximation methods are commonly used when calculating the values.</p>
        <p>To ensure that the SHAP values we calculated were not too greatly affected by the approximation method used, we also computed the LIME [<xref ref-type="bibr" rid="ref29">29</xref>] scores for the models across the entire data set. LIME tries to estimate locally faithful linear explanations (ie, explanations that correspond to how the model behaves around the instance being explained) for any classifier. LIME achieves this by creating local linear classifiers that approximate the behavior of the original model in the vicinity of the data being explained. As linear models are inherently interpretable through their parameters, they can be used to generate explanations of the original model. Both SHAP and LIME have the advantage that they are model-agnostic techniques, and so we were able to apply both methods to both of our black box classification models (SVM and MLP).</p>
      </sec>
      <sec>
        <title>Data Set</title>
        <p>The data used in this study were taken from the KAIMRC data set. The data were collected from King Abdulaziz Medical City located in the central and western regions of Saudi Arabia, an area which has been ranked second in the Middle East and seventeenth in world in diabetes prevalence by the World Health Organization (WHO) [<xref ref-type="bibr" rid="ref30">30</xref>]. According to the International Diabetes Federation, the diabetes prevalence rate in Saudi Arabia is 18.3%. Therefore, the availability of the data from this population provides considerable opportunities for research into the early prediction of diabetes.</p>
        <p>The data set contains a full history of patient details, vital signs, and lab test readings for each patient visit for the period from 2016 to the end of 2018. As the aim of this study was to identify nondiabetic patients that are at a high risk of HbA<sub>1c</sub> elevation, all patients previously diagnosed with hyperglycemia were excluded from the experimental data set. The remaining cohort formed our experimental data set and was categorized by using the American Diabetes Association’s guidelines [<xref ref-type="bibr" rid="ref31">31</xref>], in which patients with HbA<sub>1c</sub> readings of more than 5.7% are considered as being in the prediabetic range, while those with less than 5.7% are considered to be in the normal range.</p>
        <p>Most medical data sets are imbalanced [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>]. These imbalances occur when the proportion of one class of patients in the data set is greater than its counterpart class [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. However, unusually, our experimental data set was not imbalanced. Slightly over half of the patients in our experimental data set (9826/18,844, 52.14%) were found to have elevated levels of HbA<sub>1c</sub> (≥5.7%) while 47.86% (9018/18,844) of patients had normal HbA<sub>1c</sub> levels (&#60;5.7%). This can be ascribed to the high incidence of diabetes in the region from which the data set was collected [<xref ref-type="bibr" rid="ref37">37</xref>].</p>
        <p>A detailed illustration of the patients’ class distribution (HbA<sub>1c</sub> levels) by age groups and gender is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. This shows that as the age of patients increased, so did the proportion of patients who had elevated HbA<sub>1c</sub> levels. The data set also exhibited a balanced gender distribution, with 49.40% (9308/18,844) of the patients being male and 50.60% (9536/18,844) being female. However, the proportion of male patients with elevated levels of HbA<sub>1c</sub> (≥5.7%) was greater than that of the female patients. Also, female patients with normal levels of HbA<sub>1c</sub> (&#60;5.7%) made more visits than did males. <xref ref-type="table" rid="table1">Table 1</xref> shows the profile for the distribution of HbA<sub>1c</sub> elevation levels organized by gender.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>HbA<sub>1c</sub> elevation levels distributed over age range and gender in the King Abdullah International Medical Research Center (KAIMRC) data set (before sampling). HbA<sub>1c</sub>: glycated hemoglobin.</p>
          </caption>
          <graphic xlink:href="medinform_v9i5e25237_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Profile for the class distribution over gender.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="310"/>
            <col width="340"/>
            <col width="320"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Characteristics</td>
                <td>HbA<sub>1c</sub><sup>a</sup> &#60;5.7%, n/N (%)</td>
                <td>HbA<sub>1c</sub> ≥5.7%, n/N (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Number of patients</bold>
                  <bold>(N=18,844)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Total</td>
                <td>9018/18,844 (47.86)</td>
                <td>9826/18,844 (52.14)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>3764/9018 (41.74)</td>
                <td>5544/9826 (56.42)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>5253/9018 (58.26)</td>
                <td>4282/9826 (43.58)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Number of visits (N=157,600)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Total</td>
                <td>79,607/157,600 (50.51)</td>
                <td>77,993/157,600 (49.49)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>31,620/79,607 (39.72)</td>
                <td>41,591/77,993 (53.32)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>47,987/79,607 (60.28)</td>
                <td>36,402/77,993 (46.68)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>HbA<sub>1c</sub>: glycated hemoglobin.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Feature Selection and Data Sampling</title>
        <p>Six main variables (features) were extracted from the KAIMRC EHR data set to be used in this study. These features, which were selected first for their theoretical association with hyperglycemia and second for their availability in the KAIMRC data set, were the following: age, BMI, estimated glomerular filtration rate (eGFR), RBS, total cholesterol, and non–high-density lipoprotein. The lab codes of the features used are available in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> Table S1. The descriptive statistics (using the data for the current visit only for unique patients), units, and <italic>P</italic> values for the selected features are presented in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Descriptive statistics of the selected features from the King Abdullah International Medical Research Center (KAIMRC) data set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="220"/>
            <col width="240"/>
            <col width="140"/>
            <thead>
              <tr valign="bottom">
                <td>Feature</td>
                <td>HbA<sub>1c</sub><sup>a</sup>  5.7%, mean (SD)</td>
                <td>HbA<sub>1c</sub>  5.7%, mean (SD)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Age (years)</td>
                <td>43.94 (16.38)</td>
                <td>58.92 (15.12)</td>
                <td>&#60;0.001</td>
              </tr>
              <tr valign="top">
                <td>BMI (Kg/m<sup>2</sup>)</td>
                <td>29.11 (6.75)</td>
                <td>30.90 (6.55)</td>
                <td>&#60;0.001</td>
              </tr>
              <tr valign="top">
                <td>eGFR<sup>b</sup> (ml/min/1.73 m<sup>2</sup>)</td>
                <td>100.03 (29.22)</td>
                <td>85.81 (28.239)</td>
                <td>&#60;0.001</td>
              </tr>
              <tr valign="top">
                <td>RBS<sup>c</sup> (mmol/L)</td>
                <td>5.45 (1.26)</td>
                <td>7.88 (4.19)</td>
                <td>&#60;0.001</td>
              </tr>
              <tr valign="top">
                <td>CHOL<sup>d</sup> mean (mmol/L)</td>
                <td>4.65 (1.07)</td>
                <td>4.42 (1.20)</td>
                <td>&#60;0.001</td>
              </tr>
              <tr valign="top">
                <td>non-HDL<sup>e</sup> mean (mmol/L)</td>
                <td>3.45 (1.01)</td>
                <td>3.37 (1.115)</td>
                <td>&#60;0.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>HbA<sub>1c</sub>: glycated hemoglobin.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>eFGR: estimated glomerular filtration rate.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>RBS: random blood sugar.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>CHOL: total cholesterol.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>non-HDL: non–high-density lipoprotein.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>It is very common in clinical practice that physicians may require some lab tests and vital signs to be frequently recorded. In these cases, the average value of all readings taken on a given day (the basic time interval used for this study) was used. For inpatient visits, only data for the first day were considered, and, where there were missing values, the first available values from the visit were used.</p>
        <p>For the purpose of this study, we aimed at predicting the HbA<sub>1c</sub> levels (≥5.7%) for current (last) patient visits only. Unlike the sampling approach used by Wells et al, which was based on independent hospital visits for patients (including for the same patients), the sampling approach used in this study included independent patients to ensure only unseen patients data were used for testing the models. Although we aimed to identify patients with elevated levels of HbA<sub>1c</sub> from a nondiabetic population, patients previously diagnosed with diabetes were excluded. We also excluded nonadult patients and those with erroneous or missing values [<xref ref-type="bibr" rid="ref24">24</xref>]. <xref rid="figure2" ref-type="fig">Figure 2</xref> shows the details of the tasks performed to refine the sample selection. This resulted in a reduction in the size of the experimental data set from 114,057 patients with 750,709 visits to 18,844 unique patients with 157,600 visits.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Details of the sampling approach performed on the KAIMRC data set. EHR: electronic health record; HbA<sub>1c</sub>: glycated haemoglobin; KAIMRC: King Abdullah International Medical Research Center.</p>
          </caption>
          <graphic xlink:href="medinform_v9i5e25237_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The inputs (input features space) for the models used in this study were continuous values. Values for age, eGFR, RBS and total cholesterol features were directly available in the KAIMRC data set. The values for the BMI and non–high-density lipoprotein variables were calculated from other available features using the formulae in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
      </sec>
      <sec>
        <title>Input Preparation for the Models</title>
        <p>The input structure for the deep learning model was organized as a matrix, based on current and previous time-stamped patient visits. It contained the current visit data concatenated with approximated values for the selected features from all previous visits, which we refer to as the “Approximated Time Series Data”.</p>
        <p>Each patient visit was described by the selected features, represented as <italic>x</italic><sub>1</sub>, <italic>x</italic><sub>2</sub> …, <italic>x<sub>n</sub></italic>. These features were formed as episodes based on the time-stamped values available in each visit (v<sub>i</sub>).</p>
        <graphic xlink:href="medinform_v9i5e25237_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>Here, <italic>x<sub>ij</sub></italic> is the feature value at a patient visit (0 &#60; <italic>i</italic> ≥ <italic>s</italic>, 0 &#60; <italic>j</italic> ≥ <italic>n</italic>); <italic>s</italic> is the number of time series steps (the length of the input sequence); and <italic>n</italic> is the number of features for each time step, which was set to 6 as explained earlier.</p>
        <p>If the number of visits (longitudinal time series visits) for a patient was fewer than <italic>s</italic>, the input for this patient was padded out with the mean value of the available visits to compensate for the missing time series data (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> shows an example of the padding approach used). Where the number of longitudinal visits for a patient was more than <italic>s</italic>, the piecewise aggregation approximation (PAA) technique [<xref ref-type="bibr" rid="ref38">38</xref>] was applied to the data for these visits to account for all data from patient visits.</p>
        <p>PAA transforms the longitudinal time series data using <italic>s</italic> as a number of sliding windows (or segments) into a reduced number of time steps data (approximated) employing the mean value of the series falling within that window (segment) [<xref ref-type="bibr" rid="ref39">39</xref>]. We tested the models with several values for the size of the sliding window (<italic>s</italic>), and 3 was shown to be the optimal value. The formula used to calculate the approximated time-series data was as follows:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v9i5e25237_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>Where <inline-graphic xlink:href="medinform_v9i5e25237_fig15.png" xlink:type="simple" mimetype="image"/> represents the approximated value for <italic>x</italic>, <italic>r</italic> is the total number of visits for a patient, and <italic>s</italic> is the reduced number of time series steps (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> shows an example of the PAA technique used).</p>
        <p>The approximated time series data forming the output of the PAA was then concatenated with the current visit data to form the final input for the deep learning model. As the MLR, RF, SVM, and LR models are not capable of handling multidimensional data (formed as matrices), the output of the PAA was reorganized for these into a single-dimensional input by vectorizing the matrix used in equation 1 as below:</p>
        <disp-formula>Input = [<italic>x</italic><sub>11</sub> <italic>x</italic><sub>12</sub> <italic>x</italic><sub>13 …</sub> <italic>x<sub>sn</sub></italic>]    <bold>(3)</bold></disp-formula>
        <p>The last data preprocessing task before training the predictive models was data scaling. The experimental data set was scaled using the normalization technique that rescales the ranges of each of the features to be between 0 and 1 using minimum and maximum values of that feature.</p>
      </sec>
      <sec>
        <title>Predictive Models and Experimental Setups</title>
        <p>As a baseline comparison, we employed the MLR model used by Wells et al [<xref ref-type="bibr" rid="ref21">21</xref>], and compared the results from this with those from 4 commonly used machine learning models.</p>
        <p>The MLR model is used to create a mathematical equation that can best calculate the probability of a value by assigning weights (coefficients) to the independent variables (features) based on their importance [<xref ref-type="bibr" rid="ref40">40</xref>]. In this study we employed the same approach used by Wells et al by which the continuous features were fitted into the MLR model using restricted cubic splines technique with 3 knots. When we used the longitudinal input, the variables that caused collinearity were excluded.</p>
        <p>Random forest is an algorithm very commonly used for classification. It combines several decision trees that are generated during the training process. Each decision tree is trained using a random subset of the training data set. The final classification is then based on the majority voting results of all generated decision trees [<xref ref-type="bibr" rid="ref41">41</xref>]. The quality function used in the employed RF model is the Gini importance, with a value of 100 for the number of tree parameters.</p>
        <p>Logistic regression is commonly used to solve binary classification problems. It calculates the odds ratio of the variables and is similar to MLR but uses a binomial distribution of the dependent variable (ie, more than 1). Thus, it includes a logit function that handles different types of relationships between the dependent and independent variables [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>].</p>
        <p>Support vector machine was introduced by Vapnik [<xref ref-type="bibr" rid="ref44">44</xref>] in 1998. It can solve both classification and regression problems. It uses the training feature space to decide on the separation boundaries (hyperplane) that best divides the training data set into regions, 1 for each class. The very close points to the hyperplanes are the support vectors. SVMs also use kernels to help enhance class separation by mapping the training features into a higher dimensional space with an increased number of dimensions [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. The kernel function used in the SVM model employed is a radial base function with a value of 1 for the cost parameter (<italic>C</italic>).</p>
        <p>A multilayer perceptron, also known as a feed-forward neural network, is one of the most common deep learning approaches. It is mainly used to address supervised learning problems by learning the dependencies between the input layer (the features or variables) and output layer (the classification decision) using a fully connected hidden layer in between. The layers, including hidden ones, contain a number of neurons that are connected to the neurons of the next and previous layers via weights and nonlinear functions. MLP uses a backpropagation algorithm to update the weights and biases within the hidden layers to minimize the output error rate [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref46">46</xref>].</p>
        <p>To optimize the MLP model, fine-tuning of the structure and hyperparameters was performed and involved the number of hidden layers and neurons, activation functions, optimizers, and loss functions. The optimized structure of the MLP model used in this study contained 3 hidden layers. The number of neurons in the hidden layers were 48, 48, and 24, respectively. The final layer (the output layer) contained 2 neurons for the final output of the model (<italic>Y</italic>1 for normal HbA<sub>1c</sub> or <italic>Y</italic>2 for elevated HbA<sub>1c</sub>). A rectified linear unit activation function was used in the 3 hidden layers, while a sigmoid was used in the output layer. The detailed structure of the MLP model is shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>. The model was trained using an Adam optimizer with mean squared error as the loss function.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>The structure used for multilayer perceptron trained with the longitudinal data. relu: rectified linear unit.</p>
          </caption>
          <graphic xlink:href="medinform_v9i5e25237_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Evaluation of Model Performance</title>
        <p>The models all employed the same data preprocessing, training, and testing techniques. The models were validated using the 10-fold cross-validation technique. The k-fold cross-validation is one of the most commonly used approximation approaches for validating the obtained results [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>]. For the MLP model, 100 epochs were used to train each fold.</p>
        <p>As our measure for evaluating and comparing the performance of the proposed models, we used the area under the receiver operating characteristic (AUC-ROC) curve, which is equal to the concordance statistic [<xref ref-type="bibr" rid="ref49">49</xref>]. We also report values for a set of measures that are commonly used in clinical applications: balanced accuracy (that calculates the recall average for each class), overall accuracy, <italic>F</italic> score, precision, and precision-recall area under the curve (PR-AUC).</p>
        <p>To determine the importance that the black box models (SVM and MLP) place upon each variable, we first computed the SHAP values and LIME scores for all samples in our data set and then calculated the average absolute SHAP value and LIME score for each predictor.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p><xref ref-type="table" rid="table3">Table 3</xref> shows the performance metrics obtained using the MLR, RF, SVM, LR, and MLP models with and without the longitudinal data. The results show that the models achieved competitive performance using the reported measures. The LR and MLP models trained with and without the longitudinal data achieved better performance with regards to the AUC-ROC measure than did the MLR (statistical model employed by Wells et al) or the RF and SVM models (more details about AUC-ROC and PR-AUC curve plots are presented in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>). The results also show that the SVM, LR, and MLP models trained with and without the longitudinal data achieved better performance than did the MLR and RF models using the balanced accuracy measure.</p>
      <p><xref ref-type="table" rid="table3">Table 3</xref> also shows that all models, including the MLR, achieved better performance using all reported measures when they were trained with the features from patients’ longitudinal data. The MLP with longitudinal data slightly outperformed all other models with respect to the reported measures.</p>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Classifiers performance for current glycated hemoglobin level prediction.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="20"/>
          <col width="60"/>
          <col width="0"/>
          <col width="150"/>
          <col width="0"/>
          <col width="200"/>
          <col width="0"/>
          <col width="150"/>
          <col width="0"/>
          <col width="130"/>
          <col width="0"/>
          <col width="140"/>
          <col width="0"/>
          <col width="150"/>
          <thead>
            <tr valign="bottom">
              <td colspan="3">Model</td>
              <td colspan="2">AUC-ROC<sup>a</sup>, % (SD)</td>
              <td colspan="2">Balanced accuracy, % (SD)</td>
              <td colspan="2">Accuracy, % (SD)</td>
              <td colspan="2"><italic>F</italic> score, % (SD)</td>
              <td colspan="2">Precision, % (SD)</td>
              <td>PR-AUC<sup>b</sup>, % (SD)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="14">
                <bold>MLR<sup>c</sup></bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No<sup>d</sup></td>
              <td colspan="2">81.38 (3.82)</td>
              <td colspan="2">72.74 (4.15)</td>
              <td colspan="2">73.59 (3.79)</td>
              <td colspan="2">74.91 (5.12)</td>
              <td colspan="2">73.20 (5.05)</td>
              <td colspan="2">82.14 (6.04)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes<sup>e</sup></td>
              <td colspan="2">82.45 (4.09)</td>
              <td colspan="2">73.49 (4.19)</td>
              <td colspan="2">74.30 (4.02)</td>
              <td colspan="2">75.11 (6.00)</td>
              <td colspan="2">74.36 (5.26)</td>
              <td colspan="2">83.45 (6.29)</td>
            </tr>
            <tr valign="top">
              <td colspan="14">
                <bold>RF<sup>f</sup></bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td colspan="2">80.82 (1.14)</td>
              <td colspan="2">72.57 (1.17)</td>
              <td colspan="2">72.64 (1.14)</td>
              <td colspan="2">73.97 (1.04)</td>
              <td colspan="2">73.42 (1.84)</td>
              <td colspan="2">82.03 (1.35)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td colspan="2">82.38 (1.04)</td>
              <td colspan="2">73.86 (0.98)</td>
              <td colspan="2">73.91 (0.95)</td>
              <td colspan="2">75.07 (0.86)</td>
              <td colspan="2">74.81 (1.68)</td>
              <td colspan="2">84.06 (1.17)</td>
            </tr>
            <tr valign="top">
              <td colspan="14">
                <bold>SVM<sup>g</sup></bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td colspan="2">81.05 (1.04)</td>
              <td colspan="2">73.69 (1.35)</td>
              <td colspan="2">73.88 (1.33)</td>
              <td colspan="2">75.76 (1.18)</td>
              <td colspan="2">73.42 (1.90)</td>
              <td colspan="2">80.56 (1.48)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td colspan="2">82.04 (0.89)</td>
              <td colspan="2">74.25 (1.11)</td>
              <td colspan="2">74.40 (1.08)</td>
              <td colspan="2">76.08 (0.92)</td>
              <td colspan="2">74.20 (1.65)</td>
              <td colspan="2">83.16 (1.19)</td>
            </tr>
            <tr valign="top">
              <td colspan="14">
                <bold>LR<sup>h</sup></bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td colspan="2">81.51 (1.26)</td>
              <td colspan="2">73.18 (1.10)</td>
              <td colspan="2">73.17 (1.08)</td>
              <td colspan="2">73.96 (1.03)</td>
              <td colspan="2">74.88 (1.69)</td>
              <td colspan="2">82.49 (1.46)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td colspan="2">82.59 (1.04)</td>
              <td colspan="2">74.11 (1.15)</td>
              <td colspan="2">74.05 (1.13)</td>
              <td colspan="2">74.55 (0.98)</td>
              <td colspan="2">76.31 (1.72)</td>
              <td colspan="2">84.13 (1.04)</td>
            </tr>
            <tr valign="top">
              <td colspan="14">
                <bold>MLP<sup>i</sup></bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td colspan="2">82.07 (1.06)</td>
              <td colspan="2">73.61 (1.04)</td>
              <td colspan="2">73.83 (1.03)</td>
              <td colspan="2">75.87 (1.10)</td>
              <td colspan="2">73.07 (1.62)</td>
              <td colspan="2">83.42 (1.19)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td colspan="2">83.22 (0.92)</td>
              <td colspan="2">74.45 (1.18)</td>
              <td colspan="2">74.55 (1.18)</td>
              <td colspan="2">75.99 (1.95)</td>
              <td colspan="2">74.78 (2.07)</td>
              <td colspan="2">84.85 (0.78)</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup>AUC-ROC: area under the receiver operating characteristic.</p>
          </fn>
          <fn id="table3fn2">
            <p><sup>b</sup>PR-AUC: precision-recall area under the curve.</p>
          </fn>
          <fn id="table3fn3">
            <p><sup>c</sup>MLR: multiple logistic regression.</p>
          </fn>
          <fn id="table3fn4">
            <p><sup>d</sup>Without longitudinal data.</p>
          </fn>
          <fn id="table3fn5">
            <p><sup>e</sup>With longitudinal data.</p>
          </fn>
          <fn id="table3fn6">
            <p><sup>f</sup>RF: random forest.</p>
          </fn>
          <fn id="table3fn7">
            <p><sup>g</sup>SVM: support vector machine.</p>
          </fn>
          <fn id="table3fn8">
            <p><sup>h</sup>LR: logistic regression.</p>
          </fn>
          <fn id="table3fn9">
            <p><sup>i</sup>MLP: multilayer perceptron.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <p><xref rid="figure4" ref-type="fig">Figure 4</xref> summarizes the 10-fold performance achieved for the set of measures where the models were trained without longitudinal data, and <xref rid="figure5" ref-type="fig">Figure 5</xref> shows the performance where they were trained with the longitudinal data. Both figures show a more consistent prediction trend for RF, LR, SVM, and MLP with and without longitudinal data, as the measures for these models show a small variation between the folds. As shown in <xref rid="figure4" ref-type="fig">Figure 4</xref> and <xref rid="figure5" ref-type="fig">Figure 5</xref>, the SD values for MLR with and without longitudinal data are larger than those for the other models. This indicates that the machine learning models used can not only enhance the performance, but can also improve the classification confidence for HbA<sub>1c</sub> prediction.</p>
      <fig id="figure4" position="float">
        <label>Figure 4</label>
        <caption>
          <p>Box plot showing the detailed 10-fold performance of all models trained without longitudinal data. AUR-ROC: area under the receiver operating characteristic; LR: logistic regression; MLP: multilayer perceptron; MLR: multiple logistic regression; PR-AUC: precision-recall area under the curve; RF: random forest; SVM: support vector machine.</p>
        </caption>
        <graphic xlink:href="medinform_v9i5e25237_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <fig id="figure5" position="float">
        <label>Figure 5</label>
        <caption>
          <p>Boxplot showing the detailed 10-fold performance of all models trained with longitudinal data. AUR-ROC: area under the receiver operating characteristic; LR: logistic regression; MLP: multilayer perceptron; MLR: multiple logistic regression; PR-AUC: precision-recall area under the curve; RF: random forest; SVM: support vector machine.</p>
        </caption>
        <graphic xlink:href="medinform_v9i5e25237_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p><xref ref-type="table" rid="table4">Table 4</xref> shows the ranked order of importance of the set of predictors used for training the models. Further details on the actual importance values for each model are provided in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref> (refer to <xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref> for more details of the MLR and LR calculator). Calculating the importance of the predictors for the MLR models using vectorized longitudinal data was not possible due to the collinearity caused by having multiple variables for BMI. The order of importance results obtained using the SHAP method for both the SVM and MLP were identical to those obtained using LIME and provided greater confidence in the explainable methods used (see <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>).</p>
      <table-wrap position="float" id="table4">
        <label>Table 4</label>
        <caption>
          <p>Order of importance of predictors for the models.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="190"/>
          <col width="120"/>
          <col width="100"/>
          <col width="140"/>
          <col width="130"/>
          <col width="150"/>
          <col width="140"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Model</td>
              <td colspan="6">Importance rank</td>
            </tr>
            <tr valign="top">
              <td colspan="2">
                <break/>
              </td>
              <td>1st</td>
              <td>2nd</td>
              <td>3rd</td>
              <td>4th</td>
              <td>5th</td>
              <td>6th</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="8">
                <bold>MLR<sup>a</sup></bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No<sup>b</sup></td>
              <td>Age</td>
              <td>RBS<sup>c</sup></td>
              <td>BMI</td>
              <td>CHOL<sup>d</sup></td>
              <td>Non-HDL<sup>e</sup></td>
              <td>eGFR<sup>f</sup></td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>RF<sup>g</sup></bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>Age </td>
              <td>RBS </td>
              <td>BMI</td>
              <td>eGFR </td>
              <td>CHOL</td>
              <td>Non-HDL<sup>h</sup> </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes<sup>h</sup></td>
              <td>RBS</td>
              <td>Age</td>
              <td>CHOL</td>
              <td>eGFR </td>
              <td>Non-HDL </td>
              <td>BMI</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>LR<sup>i</sup></bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>RBS </td>
              <td>Age </td>
              <td>Non-HDL </td>
              <td>CHOL </td>
              <td>BMI </td>
              <td>eGFR </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td>RBS</td>
              <td>Age</td>
              <td>Non-HDL </td>
              <td>eGFR</td>
              <td>CHOL </td>
              <td>BMI</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>SVM<sup>j</sup> (SHAP<sup>k</sup> &#38; LIME<sup>l</sup>)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>Age </td>
              <td>RBS </td>
              <td>BMI </td>
              <td>Non-HDL </td>
              <td>CHOL </td>
              <td>eGFR </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td>RBS</td>
              <td>Age</td>
              <td>CHOL </td>
              <td>Non-HDL </td>
              <td>BMI</td>
              <td>eGFR</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>MLP<sup>m</sup> (SHAP &#38; LIME)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>RBS</td>
              <td>Age</td>
              <td>Non-HDL </td>
              <td>CHOL</td>
              <td>BMI</td>
              <td>eGFR</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td>RBS </td>
              <td>Age </td>
              <td>eGFR </td>
              <td>CHOL </td>
              <td>Non-HDL </td>
              <td>BMI</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table4fn1">
            <p><sup>a</sup>MLR: multiple logistic regression.</p>
          </fn>
          <fn id="table4fn2">
            <p><sup>b</sup>Without longitudinal data.</p>
          </fn>
          <fn id="table4fn3">
            <p><sup>c</sup>RBS: random blood sugar.</p>
          </fn>
          <fn id="table4fn4">
            <p><sup>d</sup>CHOL: total cholesterol.</p>
          </fn>
          <fn id="table4fn5">
            <p><sup>e</sup>non-HDL: non–high-density lipoprotein.</p>
          </fn>
          <fn id="table4fn6">
            <p><sup>f</sup>eGFR: estimated glomerular filtration rate.</p>
          </fn>
          <fn id="table4fn7">
            <p><sup>g</sup>RF: random forest.</p>
          </fn>
          <fn id="table4fn8">
            <p><sup>h</sup>With longitudinal data.</p>
          </fn>
          <fn id="table4fn9">
            <p><sup>i</sup>LR: logistic regression.</p>
          </fn>
          <fn id="table4fn10">
            <p><sup>j</sup>SVM: support vector machine.</p>
          </fn>
          <fn id="table4fn11">
            <p><sup>k</sup>SHAP: Shapley Additive Explanations.</p>
          </fn>
          <fn id="table4fn12">
            <p><sup>l</sup>LIME: local interpretable model-agnostic explanations.</p>
          </fn>
          <fn id="table4fn13">
            <p><sup>m</sup>MLP: multilayer perceptron.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <p><xref ref-type="table" rid="table4">Table 4</xref> and the figures in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref> show that all of the models were heavily and interchangeably reliant on age and RBS when making classification decisions. The RF and SVM models, when trained with longitudinal data, ranked RBS over age. <xref rid="figure6" ref-type="fig">Figure 6</xref> and <xref rid="figure7" ref-type="fig">Figure 7</xref> highlight the importance that our best performing model, MLP, placed upon the features in our data set using SHAP and LIME, respectively. Both figures show that the RBS contributed the most to the MLP’s final prediction, while the patient’s BMI contributed the least.</p>
      <fig id="figure6" position="float">
        <label>Figure 6</label>
        <caption>
          <p>Relative importance of predictors obtained from the multilayer perceptron trained with longitudinal data using SHAP. CHOL: total cholesterol; eGFR: estimated glomerular filtration rate; non-HDL: non–high-density lipoprotein; RBS: random blood sugar; SHAP: Shapley Additive Explanations.</p>
        </caption>
        <graphic xlink:href="medinform_v9i5e25237_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <fig id="figure7" position="float">
        <label>Figure 7</label>
        <caption>
          <p>Relative importance of predictors obtained from multilayer perceptron trained with longitudinal data using LIME. CHOL: total cholesterol; eGFR: estimated glomerular filtration rate; LIME: local interpretable model-agnostic explanations; non-HDL: non–high-density lipoprotein; RBS: random blood sugar.</p>
        </caption>
        <graphic xlink:href="medinform_v9i5e25237_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>For all models trained with longitudinal data, BMI was ranked lower than when the models were trained without longitudinal data. However, the importance value produced for the BMI variable from the models was still not insignificant (see the figures in <xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>). This indicates that models are able to find subtle relationships in the longitudinal data that are more relevant to the prediction than is BMI, rendering it less important.</p>
      <p>When MLP and LR models trained on the longitudinal data were used, the eGFR variable was ranked higher than total cholesterol and BMI, in contrast to when these were trained on the current visit only. None of the other models trained with the current visit only, except for RF, considered it important. Again, we ascribe this to the information that the model learns from the variations of eGFR values between a patient’s visits (longitudinal EHR data).</p>
      <p>SHAP values are calculated on the sample level. <xref rid="figure8" ref-type="fig">Figures 8</xref> and <xref rid="figure9" ref-type="fig">9</xref> illustrate the SHAP values for 2 randomly selected sample patients from our data set. These figures highlight how different inputs have different SHAP values. The patient in <xref rid="figure8" ref-type="fig">Figure 8</xref> (for whom our model correctly predicted elevated HbA<sub>1c</sub> levels of ≥5.7%) had a higher RBS value than did the patient in <xref rid="figure9" ref-type="fig">Figure 9</xref> (for whom our model correctly predicted normal HbA<sub>1c</sub> levels of &#60;5.7%). This explains why our MLP model placed much more importance on the RBS value of the patient in <xref rid="figure6" ref-type="fig">Figure 6</xref>.</p>
      <fig id="figure8" position="float">
        <label>Figure 8</label>
        <caption>
          <p>An example showing the SHAP values for a randomly selected sample with elevated glycated hemoglobin levels (≥5.7%). CHOL: total cholesterol; eGFR: estimated glomerular filtration rate; non-HDL: non–high-density lipoprotein; RBS: random blood sugar; SHAP: Shapley Additive Explanations.</p>
        </caption>
        <graphic xlink:href="medinform_v9i5e25237_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <fig id="figure9" position="float">
        <label>Figure 9</label>
        <caption>
          <p>An example showing the SHAP values for randomly selected sample with normal glycated hemoglobin levels (&#60;5.7%). CHOL: total cholesterol; eGFR: estimated glomerular filtration rate; non-HDL: non–high-density lipoprotein; RBS: random blood sugar; SHAP: Shapley Additive Explanations.</p>
        </caption>
        <graphic xlink:href="medinform_v9i5e25237_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>The task of predicting HbA<sub>1c</sub> elevation risk can be challenging. <xref rid="figure10" ref-type="fig">Figure 10</xref> provides a visualization of the data points for the 2 classes (prediabetic with ≥5.7%; normal with &#60;5.7%) after mapping of the data points (for the test data) into 2 dimensions with t-distributed stochastic neighbor embedding was performed [<xref ref-type="bibr" rid="ref50">50</xref>]. The overlap in the data points visualized in the figure demonstrates the challenge of separating the patients with and without elevated levels of HbA<sub>1c</sub> (≥5.7%) in the KAIMRC data set. We avoided intensive feature engineering techniques in the sampling approach used. However, the approaches adopted were able to achieve promising results with an accuracy of 83.22% for the AUC-ROC using MLP with historical data.</p>
      <fig id="figure10" position="float">
        <label>Figure 10</label>
        <caption>
          <p>Two-dimensional visualization using t-distributed stochastic neighbor embedding for a randomly selected subset of the data. HbA<sub>1c</sub>: glycated hemoglobin.</p>
        </caption>
        <graphic xlink:href="medinform_v9i5e25237_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>In summary, all models showed promising results for predicting the current HbA<sub>1c</sub> elevation levels (≥5.7%) with EHR data. The results emphasize that the HbA<sub>1c</sub> predictive models can exhibit more learnability when they are trained with the longitudinal patient data observations typically available from EHR systems.</p>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Strengths and Limitations</title>
        <p>EHR systems were adopted for the purpose of improving health care outcomes and were not originally intended for research purposes [<xref ref-type="bibr" rid="ref19">19</xref>]. Patient data stored in EHR systems can be obtained at irregular intervals, as lab instructions are carried out with different frequencies based on the physician's decisions and a patient’s visit patterns. It is very common that medical data extracted from EHR systems suffer from problems such as irregularity, incompleteness, and noisy and imbalanced data [<xref ref-type="bibr" rid="ref13">13</xref>]. These can be challenging obstacles for any technology used for predictive analytics.</p>
        <p>In our study, the sampling approach used did not affect the balanced nature of the data set used. As shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>, there were 56,185 unique patients present before removal of the records with 1 or more missing values. The number of unique patients with elevated HbA<sub>1c</sub> levels (≥5.7%) before removal of the incomplete records was 27,354, resulting in a retention of 48.68% (27,354/56,185). The number of unique patients with normal HbA<sub>1c</sub> levels was 28,831, resulting in a retention of 51.32% (28,831/56,185). We would argue that the absence or the presence of the HbA<sub>1c</sub> readings is not random, as the sample was collected from the population of Saudi Arabia and thus the likelihood of a patient taking an HbA<sub>1c</sub> test is large because of the prevalence of diabetes in this country [<xref ref-type="bibr" rid="ref51">51</xref>]. This may affect the reproducibility of this work using different populations from different countries especially those with lower rates of diabetes.</p>
        <p>It is hoped that these outcomes will encourage further investigation into the predictability of current HbA<sub>1c</sub> levels (≥5.7%) using more of the readings normally provided in EHR data. For example, other important readings such as FBS and triglycerides have shown clinical correlations with diabetes [<xref ref-type="bibr" rid="ref52">52</xref>]. In addition, our data set contained only 3 years of patient data, which limits the number of patient visits recorded. <xref rid="figure11" ref-type="fig">Figure 11</xref> shows the number of visits made by patients from 2016 to 2018, while <xref rid="figure12" ref-type="fig">Figure 12</xref> details the number of visits made by patients (after removal of the outliers) over HbA<sub>1c</sub> levels. Both figures show that the majority of the patients have made relatively few visits: 52% (8713/16818) of the patients made 4 visits or fewer over the 3 years (1.3 visit per year). This also justifies the size of the sliding window (<italic>s</italic>=3) as the optimal input size for the models used. However, we hypothesize that the longitudinal behavior of the features used can be enriched by including more values obtained over longer periods. Therefore, incorporating more features and their longitudinal behavior over longer periods into the models used in this study would likely improve the prediction performance of our chosen models.</p>
        <fig id="figure11" position="float">
          <label>Figure 11</label>
          <caption>
            <p>Histogram showing the trend in the number of visits made by patients.</p>
          </caption>
          <graphic xlink:href="medinform_v9i5e25237_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure12" position="float">
          <label>Figure 12</label>
          <caption>
            <p>The details for the number of visits made over number of patients. HbA<sub>1c</sub>: glycated hemoglobin.</p>
          </caption>
          <graphic xlink:href="medinform_v9i5e25237_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Variations in the data or model produce slightly different attribution values. However, due to the critical nature of many health care applications, it is always important to verify that the models make “sensible” predictions. Without the use of SHAP/LIME, this would be hard to verify for any nonlinear model. Although it is possible to see that the models have high performance, we would be unable to verify that a model is not making spurious correlations. Furthermore, through the use of SHAP, we can verify that MLPs trained on the longitudinal data are learning to use the extra information contained in the longitudinal data (as indicated by the higher importance of eGFR), allowing us to pinpoint the reason these models gain higher performance.</p>
        <p>To investigate the effect of temporal dependencies in the data, this study investigated the use of other deep learning models along with the MLP, including long short-term memory (LSTM) and bidirectional LSTM [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref53">53</xref>] for HbA<sub>1c</sub> prediction. <xref ref-type="table" rid="table5">Table 5</xref> reports the results of using these models. The MLP model achieved similar performance to the LSTM and bidirectional LSTM models according to all reported measures. This suggests that directly modeling the temporal dynamics in the data is not very helpful. This could be due to the short lengths of the time series or a too-weak temporal dependency.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>LSTM and BiLSTM Classifiers performance trained with longitudinal data for current HbA<sub>1c</sub> levels prediction.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="80"/>
            <col width="170"/>
            <col width="200"/>
            <col width="150"/>
            <col width="120"/>
            <col width="140"/>
            <col width="140"/>
            <thead>
              <tr valign="bottom">
                <td>Model</td>
                <td>AUC-ROC<sup>a</sup>, % (SD)</td>
                <td>Balanced Accuracy, % (SD)</td>
                <td>Accuracy, % (SD)</td>
                <td><italic>F</italic> score, % (SD)</td>
                <td>Precision, % (SD)</td>
                <td>PR-AUC<sup>b</sup>, % (SD)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>LSTM<sup>c</sup></td>
                <td>83.26% (0.91)</td>
                <td>74.17% (1.05)</td>
                <td>74.59% (1.23)</td>
                <td>75.64% (1.50)</td>
                <td>74.59% (3.26)</td>
                <td>81.88% (0.95)</td>
              </tr>
              <tr valign="top">
                <td>BiLSTM<sup>d</sup></td>
                <td>83.16% (0.87)</td>
                <td>74.21% (1.24)</td>
                <td>74.30% (1.15)</td>
                <td>75.46% (1.39)</td>
                <td>75.19% (2.36)</td>
                <td>84.75% (0.75)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>AUC-ROC: area under the receiver operating characteristic.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>PR-AUC: precision-recall area under the curve.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>LSTM: long short-term memory.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>BiLSTM: bidirectional LSTM.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Generalizing our findings using other data sets is challenging because of the accessibility and privacy restrictions that apply to medical data sets. For this reason, and because of the lack of similar studies that have used machine learning for HbA<sub>1c</sub> prediction with EHR data, comparing the performance achieved by the models outlined in this study with those developed by other researchers will require the availability of alternative anonymized data sets.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>We believe that this study is the first to investigate the performance of machine learning models used with EHR data for predicting current HbA<sub>1c</sub> elevation risk (≥5.7%) for nondiabetic patients. It is also the first to investigate employing the longitudinal data that are normally stored on EHR systems to enhance the prediction of HbA<sub>1c</sub> elevation levels. Our findings show that the MLP model achieves better results when a patient’s longitudinal data are combined with current visit data, and the use of longitudinal data also affects the relative importance for the predictors used.</p>
        <p>As this work formed a continuation of previous work [<xref ref-type="bibr" rid="ref24">24</xref>], we avoided changing the sampling approach used. However, studying the impact of applying different sampling approaches could be valuable to explore in future work as would the use of a larger data set with more variables and the recording of longitudinal behavior over longer periods.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Lab test and diagnostic codes.</p>
        <media xlink:href="medinform_v9i5e25237_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 93 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Formulae for the calculated variables.</p>
        <media xlink:href="medinform_v9i5e25237_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 77 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>An example of the padding approach used.</p>
        <media xlink:href="medinform_v9i5e25237_app3.pdf" xlink:title="PDF File  (Adobe PDF File), 169 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>An example of the PAA technique.</p>
        <media xlink:href="medinform_v9i5e25237_app4.pdf" xlink:title="PDF File  (Adobe PDF File), 240 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>AUC-ROC and PR-AUC curves for the models (with 10 folds)&#13;
trained with longitudinal data.</p>
        <media xlink:href="medinform_v9i5e25237_app5.pdf" xlink:title="PDF File  (Adobe PDF File), 1011 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p>Variable relative importance charts for the models.</p>
        <media xlink:href="medinform_v9i5e25237_app6.pdf" xlink:title="PDF File  (Adobe PDF File), 578 KB"/>
      </supplementary-material>
      <supplementary-material id="app7">
        <label>Multimedia Appendix 7</label>
        <p>Multiple logistic regression (MLR) and logistic regression (LR) details.</p>
        <media xlink:href="medinform_v9i5e25237_app7.pdf" xlink:title="PDF File  (Adobe PDF File), 157 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUR-ROC</term>
          <def>
            <p>area under the receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">eGFR</term>
          <def>
            <p>estimated glomerular filtration rate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EHR</term>
          <def>
            <p>electronic health records</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FBS</term>
          <def>
            <p>fasting blood sugar</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HbA<sub>1c</sub></term>
          <def>
            <p>glycated hemoglobin</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">KAIMRC</term>
          <def>
            <p>King Abdullah International Medical Research Center</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LIME</term>
          <def>
            <p>local interpretable model-agnostic explanations</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">LR</term>
          <def>
            <p>logistic regression.</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">MLP</term>
          <def>
            <p>multilayer perceptron</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">MLR</term>
          <def>
            <p>multiple logistic regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">PAA</term>
          <def>
            <p>piecewise aggregation approximation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">PR-AUC</term>
          <def>
            <p>precision-recall area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">RBS</term>
          <def>
            <p>random blood sugar</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">SHAP</term>
          <def>
            <p>Shapley Additive Explanations</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">T2DM</term>
          <def>
            <p>type-2 diabetes mellitus</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We would like to acknowledge the contribution the KAIMRC for providing the data set under the approved projects: Diabetes Early Warning System (research protocol no. SP14/042), Finding the Common Related Diseases with Diabetes using Data Mining Association Techniques (research protocol no. SP15/064,) and extension project (no. RYD-17-417780-187503) to collect the newest data set. The authors would also like to thank Cievert Ltd and the European Regional Development Fund for sponsoring this work.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>ZA was responsible for implementing and building predictive models. ZA, MW, DB, and NAM were responsible for the design of the study and for writing the manuscript. ZA, MW, DB, and NAM were responsible for designing and validating the models. MW and ZA were responsible for analyzing the explainability of the machine learning model. ZA, AA, and RA were responsible for extracting and describing the data set. All authors participated in reviewing the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Larsen</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Hørder</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mogensen</surname>
              <given-names>EF</given-names>
            </name>
          </person-group>
          <article-title>Effect of long-term monitoring of glycosylated hemoglobin levels in insulin-dependent diabetes mellitus</article-title>
          <source>New England Journal of Medicine</source>
          <year>1990</year>
          <month>10</month>
          <day>11</day>
          <volume>323</volume>
          <issue>15</issue>
          <fpage>1021</fpage>
          <lpage>1025</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJM199010113231503</pub-id>
          <pub-id pub-id-type="medline">2215560</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pradhan</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Rifai</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Buring</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Ridker</surname>
              <given-names>PM</given-names>
            </name>
          </person-group>
          <article-title>Hemoglobin A1c predicts diabetes but not cardiovascular disease in nondiabetic women</article-title>
          <source>The American Journal of Medicine</source>
          <year>2007</year>
          <month>08</month>
          <volume>120</volume>
          <issue>8</issue>
          <fpage>720</fpage>
          <lpage>727</lpage>
          <pub-id pub-id-type="doi">10.1016/j.amjmed.2007.03.022</pub-id>
          <pub-id pub-id-type="medline">PMC2585540</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ackermann</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>YJ</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>DF</given-names>
            </name>
            <name name-style="western">
              <surname>Gregg</surname>
              <given-names>EW</given-names>
            </name>
          </person-group>
          <article-title>Identifying adults at high risk for diabetes and cardiovascular disease using hemoglobin A1c: National Health and Nutrition Examination Survey 2005-2006</article-title>
          <source>American Journal of Preventive Medicine</source>
          <year>2011</year>
          <month>1</month>
          <volume>40</volume>
          <issue>1</issue>
          <fpage>11</fpage>
          <lpage>17</lpage>
          <pub-id pub-id-type="doi">10.1016/j.amepre.2010.09.022</pub-id>
          <pub-id pub-id-type="medline">21146762</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>World Health Organization</collab>
          </person-group>
          <article-title>Use of glycated haemoglobin (HbA1c) in diagnosis of diabetes mellitus: abbreviated report of a WHO consultation</article-title>
          <source>World Health Organization</source>
          <year>2011</year>
          <fpage>a</fpage>
          <pub-id pub-id-type="medline">26158184</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khaw</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wareham</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bingham</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Luben</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Welch</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Day</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Association of hemoglobin A1c with cardiovascular disease and mortality in adults: the European prospective investigation into cancer in Norfolk</article-title>
          <source>Ann Intern Med</source>
          <year>2004</year>
          <month>09</month>
          <day>21</day>
          <volume>141</volume>
          <issue>6</issue>
          <fpage>413</fpage>
          <pub-id pub-id-type="doi">10.7326/0003-4819-141-6-200409210-00006</pub-id>
          <pub-id pub-id-type="medline">15381514</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>American Diabetes Association</collab>
          </person-group>
          <article-title>Classification and diagnosis of diabetes: standards of medical care in diabetes—2018</article-title>
          <source>Dia Care</source>
          <year>2017</year>
          <month>12</month>
          <day>08</day>
          <volume>41</volume>
          <issue>Supplement 1</issue>
          <fpage>S13</fpage>
          <lpage>S27</lpage>
          <pub-id pub-id-type="doi">10.2337/dc18-s002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coorevits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sundgren</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>GO</given-names>
            </name>
            <name name-style="western">
              <surname>Bahr</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Claerhout</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Daniel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dugas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dupont</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Singleton</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>De Moor</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kalra</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Electronic health records: new opportunities for clinical research</article-title>
          <source>Journal of internal medicine</source>
          <year>2013</year>
          <month>10</month>
          <day>18</day>
          <volume>274</volume>
          <issue>6</issue>
          <fpage>547</fpage>
          <lpage>560</lpage>
          <pub-id pub-id-type="doi">10.1111/joim.12119</pub-id>
          <pub-id pub-id-type="medline">23952476</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McKinney</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Reif</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Ritchie</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>Machine learning for detecting gene-gene interactions: a review</article-title>
          <source>Appl Bioinformatics</source>
          <year>2006</year>
          <volume>5</volume>
          <issue>2</issue>
          <fpage>77</fpage>
          <lpage>88</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/16722772"/>
          </comment>
          <pub-id pub-id-type="doi">10.2165/00822942-200605020-00002</pub-id>
          <pub-id pub-id-type="medline">16722772</pub-id>
          <pub-id pub-id-type="pii">522</pub-id>
          <pub-id pub-id-type="pmcid">PMC3244050</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldenberg</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Nir</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Salcudean</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>A new era: artificial intelligence and machine learning in prostate cancer</article-title>
          <source>Nature Reviews Urology</source>
          <year>2019</year>
          <month>5</month>
          <day>15</day>
          <volume>16</volume>
          <issue>7</issue>
          <fpage>391</fpage>
          <lpage>403</lpage>
          <pub-id pub-id-type="doi">10.1038/s41585-019-0193-3</pub-id>
          <pub-id pub-id-type="medline">31092914</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Botsis</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hartvigsen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Secondary use of EHR: data quality issues and informatics opportunities</article-title>
          <source>Summit Transl Bioinform</source>
          <year>2010</year>
          <fpage>1</fpage>
          <pub-id pub-id-type="medline">21347133</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Perveen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shahbaz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Keshavjee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Guergachi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Prognostic modeling and prevention of diabetes using machine learning technique</article-title>
          <source>Scientific reports</source>
          <year>2019</year>
          <month>09</month>
          <day>24</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.1038/s41598-019-49563-6</pub-id>
          <pub-id pub-id-type="medline">31551457</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Esteban</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rodríguez Tablado</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Peper</surname>
              <given-names>FE</given-names>
            </name>
            <name name-style="western">
              <surname>Mahumud</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Ricci</surname>
              <given-names>RI</given-names>
            </name>
            <name name-style="western">
              <surname>Kopitowski</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Terrasa</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>Development and validation of various phenotyping algorithms for Diabetes Mellitus using data from electronic health records</article-title>
          <source>Computer Methods and Programs in Biomedicine</source>
          <year>2017</year>
          <month>12</month>
          <volume>152</volume>
          <fpage>53</fpage>
          <lpage>70</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2017.09.009</pub-id>
          <pub-id pub-id-type="medline">29054261</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kidd</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>Deep patient: an unsupervised representation to predict the future of patients from the electronic health records</article-title>
          <source>Scientific reports</source>
          <year>2016</year>
          <month>5</month>
          <day>17</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>10</lpage>
          <pub-id pub-id-type="doi">10.1038/srep26094</pub-id>
          <pub-id pub-id-type="medline">27185194</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hippisley-Cox</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Coupland</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Robson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sheikh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brindle</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Predicting risk of type 2 diabetes in England and Wales: prospective derivation and validation of QDScore</article-title>
          <source>BMJ</source>
          <year>2009</year>
          <month>03</month>
          <day>17</day>
          <volume>338</volume>
          <issue>mar17 2</issue>
          <fpage>b880</fpage>
          <lpage>b880</lpage>
          <pub-id pub-id-type="doi">10.1136/bmj.b880</pub-id>
          <pub-id pub-id-type="medline">19297312</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alhassan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>McGough</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alshammari</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Daghstani</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Budgen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Al</surname>
              <given-names>MN</given-names>
            </name>
          </person-group>
          <article-title>Type-2 diabetes mellitus diagnosis from time series clinical data using deep learning models</article-title>
          <year>2018</year>
          <conf-name>International Conference on Artificial Neural Networks</conf-name>
          <conf-date>2018 Oct 4-7</conf-date>
          <conf-loc>Greece</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-030-01424-7_46</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McCarter</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hempe</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Chalew</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>Mean blood glucose and biological variation have greater influence on HbA1c levels than glucose instability: an analysis of data from the Diabetes Control and Complications Trial</article-title>
          <source>Diabetes Care</source>
          <year>2006</year>
          <month>01</month>
          <day>27</day>
          <volume>29</volume>
          <issue>2</issue>
          <fpage>352</fpage>
          <lpage>355</lpage>
          <pub-id pub-id-type="doi">10.2337/diacare.29.02.06.dc05-1594</pub-id>
          <pub-id pub-id-type="medline">16443886</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nathan</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Kuenen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Borg</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Schoenfeld</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Heine</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Translating the A1C assay into estimated average glucose values</article-title>
          <source>Diabetes Care</source>
          <year>2008</year>
          <month>06</month>
          <day>07</day>
          <volume>31</volume>
          <issue>8</issue>
          <fpage>1473</fpage>
          <lpage>1478</lpage>
          <pub-id pub-id-type="doi">10.2337/dc08-0545</pub-id>
          <pub-id pub-id-type="medline">18540046</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rose</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ketchell</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Clinical inquiries. Does daily monitoring of blood glucose predict hemoglobin A1c levels?</article-title>
          <source>J Fam Pract</source>
          <year>2003</year>
          <fpage>1</fpage>
          <pub-id pub-id-type="medline">12791231</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schroeder</surname>
              <given-names>EB</given-names>
            </name>
            <name name-style="western">
              <surname>Shetterly</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Goodrich</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>O’Connor</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Steiner</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Schmittdiel</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Desai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pathak</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Neugebauer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Butler</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Kirchner</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Raebel</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of hemoglobin A1c imputation using fasting plasma glucose in diabetes research using electronic health records data</article-title>
          <source>Stat., optim. inf. comput</source>
          <year>2014</year>
          <month>06</month>
          <day>01</day>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>93</fpage>
          <lpage>104</lpage>
          <pub-id pub-id-type="doi">10.19139/68</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rauh</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Heymans</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Koopman</surname>
              <given-names>ADM</given-names>
            </name>
            <name name-style="western">
              <surname>Nijpels</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Stehouwer</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Thorand</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rathmann</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Meisinger</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>de las Heras Gala</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Glümer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pedersen</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Cederberg</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kuusisto</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Laakso</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>Franks</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Rutters</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Dekker</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Predicting glycated hemoglobin levels in the non-diabetic general population: Development and validation of the DIRECT-DETECT prediction model - a DIRECT study</article-title>
          <source>PLoS ONE</source>
          <year>2017</year>
          <month>2</month>
          <day>10</day>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>e0171816</fpage>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0171816</pub-id>
          <pub-id pub-id-type="medline">28187151</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wells</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lenoir</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Diaz-Garelli</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Futrell</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lockerman</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pantalone</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Kattan</surname>
              <given-names>MW</given-names>
            </name>
          </person-group>
          <article-title>Predicting current glycated hemoglobin values in adults: development of an algorithm from the electronic health record</article-title>
          <source>JMIR Med Inform</source>
          <year>2018</year>
          <month>10</month>
          <day>22</day>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>e10780</fpage>
          <pub-id pub-id-type="doi">10.2196/10780</pub-id>
          <pub-id pub-id-type="medline">30348631</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baan</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Ruige</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Stolk</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Witteman</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Dekker</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Heine</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Feskens</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>Performance of a predictive model to identify undiagnosed diabetes in a health care setting</article-title>
          <source>Diabetes Care</source>
          <year>1999</year>
          <month>02</month>
          <day>01</day>
          <volume>22</volume>
          <issue>2</issue>
          <fpage>213</fpage>
          <lpage>219</lpage>
          <pub-id pub-id-type="doi">10.2337/diacare.22.2.213</pub-id>
          <pub-id pub-id-type="medline">10333936</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Little</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Hales</surname>
              <given-names>CN</given-names>
            </name>
            <name name-style="western">
              <surname>Kinmonth</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Wareham</surname>
              <given-names>NJ</given-names>
            </name>
          </person-group>
          <article-title>Diabetes risk score: towards earlier detection of Type 2 diabetes in general practice</article-title>
          <source>Diabetes/metabolism research and reviews</source>
          <year>2000</year>
          <month>05</month>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>164</fpage>
          <lpage>171</lpage>
          <pub-id pub-id-type="doi">10.1002/1520-7560(200005/06)16:3&#60;164::aid-dmrr103&#62;3.0.co;2-r</pub-id>
          <pub-id pub-id-type="medline">10867715</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alhassan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Budgen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Alshammari</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Al Moubayed</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Predicting current glycated hemoglobin levels in adults from electronic health records: validation of multiple logistic regression algorithm</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>7</month>
          <day>3</day>
          <volume>8</volume>
          <issue>7</issue>
          <fpage>e18963</fpage>
          <pub-id pub-id-type="doi">10.2196/18963</pub-id>
          <pub-id pub-id-type="medline">32618575</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>LeCun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Deep learning</article-title>
          <source>Nature</source>
          <year>2015</year>
          <month>05</month>
          <day>27</day>
          <volume>521</volume>
          <issue>7553</issue>
          <fpage>436</fpage>
          <lpage>444</lpage>
          <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ahmad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Eckert</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Teredesai</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Interpretable machine learning in healthcare</article-title>
          <year>2018</year>
          <conf-name>Proceedings of the  ACM international conference on bioinformatics, computational biology, and health informatics</conf-name>
          <conf-date>2018 Aug 29-Sept 1</conf-date>
          <conf-loc>Washington DC</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3233547.3233667</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lipton</surname>
              <given-names>ZC</given-names>
            </name>
          </person-group>
          <article-title>The Mythos of Model Interpretability: In machine learning, the concept of interpretability is both important and slippery</article-title>
          <source>ACM</source>
          <year>2018</year>
          <month>06</month>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>31</fpage>
          <lpage>57</lpage>
          <pub-id pub-id-type="doi">10.1145/3236386.3241340</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lundberg</surname>
              <given-names>Scott</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Su-In</given-names>
            </name>
          </person-group>
          <article-title>A unified approach to interpreting model predictions</article-title>
          <year>2017</year>
          <conf-name>Advances in neural information processing systems</conf-name>
          <conf-date>2017 Dec 4-9</conf-date>
          <conf-loc>Long Beach</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ribeiro</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>"Why should I trust you?": explaining the predictions of any classifier</article-title>
          <year>2016</year>
          <conf-name>Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining</conf-name>
          <conf-date>2016 Aug 13-16</conf-date>
          <conf-loc>San Francisco</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2939672.2939778</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abdulaziz Al Dawish</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Alwin Robert</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Braham</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Abdallah Al Hayek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Al Saeed</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed Ahmed</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sulaiman Al Sabaan</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Diabetes mellitus in Saudi Arabia: a review of the recent literature</article-title>
          <source>Current diabetes reviews</source>
          <year>2016</year>
          <month>10</month>
          <day>26</day>
          <volume>12</volume>
          <issue>4</issue>
          <fpage>359</fpage>
          <lpage>368</lpage>
          <pub-id pub-id-type="doi">10.2174/1573399811666150724095130</pub-id>
          <pub-id pub-id-type="medline">26206092</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <article-title>Understanding A1C</article-title>
          <source>American Diabetes Association</source>
          <access-date>2020-11-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.diabetes.org/a1c">https://www.diabetes.org/a1c</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Batista</surname>
              <given-names>GEAPA</given-names>
            </name>
            <name name-style="western">
              <surname>Prati</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Monard</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>A study of the behavior of several methods for balancing machine learning training data</article-title>
          <source>ACM SIGKDD Explorations Newsletter</source>
          <year>2004</year>
          <month>06</month>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>20</fpage>
          <lpage>29</lpage>
          <pub-id pub-id-type="doi">10.1145/1007730.1007735</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Imbalanced biomedical data classification using self-adaptive multilayer ELM combined with dynamic GAN</article-title>
          <source>BioMedical Engineering OnLine volume</source>
          <year>2018</year>
          <month>12</month>
          <day>4</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.1186/s12938-018-0604-3</pub-id>
          <pub-id pub-id-type="medline">30514298</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>DN</given-names>
            </name>
          </person-group>
          <article-title>Addressing the class imbalance problem in medical datasets</article-title>
          <source>International Journal of Machine Learning and Computing</source>
          <year>2013</year>
          <fpage>224</fpage>
          <lpage>228</lpage>
          <pub-id pub-id-type="doi">10.7763/ijmlc.2013.v3.307</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Longadge</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dongre</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Malik</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Class imbalance problem in data mining review</article-title>
          <source>IJCSN</source>
          <year>2013</year>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>7</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alhassan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Budgen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Alshammari</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Daghstani</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>McGough</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Al</surname>
              <given-names>MN</given-names>
            </name>
          </person-group>
          <article-title>Stacked denoising autoencoders for mortality risk prediction using imbalanced clinical data</article-title>
          <year>2018</year>
          <conf-name>International Conference on Machine Learning and Applications (ICMLA)</conf-name>
          <conf-date>2018 Dec 17</conf-date>
          <conf-loc>Orlando</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icmla.2018.00087</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alqurashi</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Aljabri</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Bokhari</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>Prevalence of diabetes mellitus in a Saudi community</article-title>
          <source>Annals of Saudi Medicine</source>
          <year>2011</year>
          <month>01</month>
          <volume>31</volume>
          <issue>1</issue>
          <fpage>19</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.4103/0256-4947.75773</pub-id>
          <pub-id pub-id-type="medline">21245594</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Keogh</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chakrabarti</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pazzani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mehrotra</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Locally adaptive dimensionality reduction for indexing large time series databases</article-title>
          <year>2001</year>
          <conf-name>The 2001 ACM SIGMOD International Conference on Management of Data</conf-name>
          <conf-date>2001 May 21-25</conf-date>
          <conf-loc>Santa Barbara</conf-loc>
          <pub-id pub-id-type="doi">10.1145/375663.375680</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Papapetrou</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Asker</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Boström</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Learning from heterogeneous temporal data in electronic health records</article-title>
          <source>Journal of Biomedical Informatics</source>
          <year>2017</year>
          <month>01</month>
          <volume>65</volume>
          <fpage>105</fpage>
          <lpage>119</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2016.11.006</pub-id>
          <pub-id pub-id-type="medline">27919732</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Handbook of Biological Statistics</source>
          <year>2009</year>
          <publisher-loc>Baltimore, MD</publisher-loc>
          <publisher-name>Sparky House Publishing</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Machine learning</source>
          <year>2001</year>
          <volume>45</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <lpage>32</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rawlings</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pantula</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dickey</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>Applied Regression Analysis</source>
          <year>2001</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>a</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sperandei</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Understanding logistic regression analysis</article-title>
          <source>Biochemia Medica</source>
          <year>2014</year>
          <fpage>12</fpage>
          <lpage>18</lpage>
          <pub-id pub-id-type="doi">10.11613/bm.2014.003</pub-id>
          <pub-id pub-id-type="medline">24627710</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vapnik</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <source>The Nature of Statistical Learning Theory</source>
          <year>2013</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Noble</surname>
              <given-names>WS</given-names>
            </name>
          </person-group>
          <article-title>What is a support vector machine?</article-title>
          <source>Nature Biotechnol</source>
          <year>2006</year>
          <month>12</month>
          <volume>24</volume>
          <issue>12</issue>
          <fpage>1565</fpage>
          <lpage>1567</lpage>
          <pub-id pub-id-type="doi">10.1038/nbt1206-1565</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gardner</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dorling</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Artificial neural networks (the multilayer perceptron)—a review of applications in the atmospheric sciences</article-title>
          <source>Atmospheric Environment</source>
          <year>1998</year>
          <month>8</month>
          <volume>32</volume>
          <issue>14-15</issue>
          <fpage>2627</fpage>
          <lpage>2636</lpage>
          <pub-id pub-id-type="doi">10.1016/s1352-2310(97)00447-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goodfellow</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Courville</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <source>Deep Learning</source>
          <year>2016</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bobadilla</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ortega</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hernando</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gutiérrez</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Recommender systems survey</article-title>
          <source>Knowledge-Based Systems</source>
          <year>2013</year>
          <month>7</month>
          <volume>46</volume>
          <fpage>109</fpage>
          <lpage>132</lpage>
          <pub-id pub-id-type="doi">10.1016/j.knosys.2013.03.012</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Austin</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Steyerberg</surname>
              <given-names>EW</given-names>
            </name>
          </person-group>
          <article-title>Interpreting the concordance statistic of a logistic regression model: relation to the variance and odds ratio of a continuous explanatory variable</article-title>
          <source>BMC medical research methodology</source>
          <year>2012</year>
          <month>6</month>
          <day>20</day>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>109</fpage>
          <lpage>132</lpage>
          <pub-id pub-id-type="doi">10.1186/1471-2288-12-82</pub-id>
          <pub-id pub-id-type="medline">22716998</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maaten</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Visualizing data using t-SNE</article-title>
          <source>Journal of machine learning research. (Nov)</source>
          <year>2008</year>
          <volume>9</volume>
          <fpage>2579</fpage>
          <lpage>605</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Zahrani</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Aldiab</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Aldossari</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Ghamdi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Batais</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Javad</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Prevalence of prediabetes, diabetes and its predictors among females in Alkharj, Saudi Arabia: a cross-sectional study</article-title>
          <source>Annals of Global Health</source>
          <year>2019</year>
          <volume>85</volume>
          <issue>1</issue>
          <fpage>A</fpage>
          <pub-id pub-id-type="doi">10.5334/aogh.2467</pub-id>
          <pub-id pub-id-type="medline">31348623</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Naqvi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Naveed</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmad</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Raj</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Correlation between glycated hemoglobin and triglyceride level in type 2 diabetes mellitus</article-title>
          <source>Cureus</source>
          <year>2017</year>
          <volume>9</volume>
          <issue>6</issue>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.7759/cureus.1347</pub-id>
          <pub-id pub-id-type="medline">28713663</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schuster</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Paliwal</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Bidirectional recurrent neural networks</article-title>
          <source>IEEE Trans. Signal Process</source>
          <year>1997</year>
          <volume>45</volume>
          <issue>11</issue>
          <fpage>2673</fpage>
          <lpage>2681</lpage>
          <pub-id pub-id-type="doi">10.1109/78.650093</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
