<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i8e29807</article-id>
      <article-id pub-id-type="pmid">34459743</article-id>
      <article-id pub-id-type="doi">10.2196/29807</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Patient-Level Cancer Prediction Models From a Nationwide Patient Cohort: Model Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Cheng</surname>
            <given-names>Xi</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hardikar</surname>
            <given-names>Navneetha</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>Eunsaem</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9606-3230</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Jung</surname>
            <given-names>Se Young</given-names>
          </name>
          <degrees>MD, MPH</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9946-8807</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Hwang</surname>
            <given-names>Hyung Ju</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Mathematics</institution>
            <institution>Pohang University of Science and Technology</institution>
            <addr-line>77 Cheongam-ro</addr-line>
            <addr-line>Nam-gu</addr-line>
            <addr-line>Pohang-si, 37673</addr-line>
            <country>Republic of Korea</country>
            <fax>82 054 279 2799</fax>
            <phone>82 054 279 2056</phone>
            <email>hjhwang@postech.ac.kr</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3678-2687</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Jung</surname>
            <given-names>Jaewoo</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6340-3275</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Mathematics</institution>
        <institution>Pohang University of Science and Technology</institution>
        <addr-line>Pohang-si</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Office of eHealth Research and Businesses</institution>
        <institution>Seoul National University Bundang Hospital</institution>
        <addr-line>Seongnam-si</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>AMSquare Corporation</institution>
        <addr-line>Pohang-si</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Hyung Ju Hwang <email>hjhwang@postech.ac.kr</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>8</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>30</day>
        <month>8</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>8</issue>
      <elocation-id>e29807</elocation-id>
      <history>
        <date date-type="received">
          <day>21</day>
          <month>4</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>12</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>7</day>
          <month>7</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>26</day>
          <month>7</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Eunsaem Lee, Se Young Jung, Hyung Ju Hwang, Jaewoo Jung. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 30.08.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/8/e29807" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Nationwide population-based cohorts provide a new opportunity to build automated risk prediction models at the patient level, and claim data are one of the more useful resources to this end. To avoid unnecessary diagnostic intervention after cancer screening tests, patient-level prediction models should be developed.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aimed to develop cancer prediction models using nationwide claim databases with machine learning algorithms, which are explainable and easily applicable in real-world environments.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>As source data, we used the Korean National Insurance System Database. Every Korean in ≥40 years old undergoes a national health checkup every 2 years. We gathered all variables from the database including demographic information, basic laboratory values, anthropometric values, and previous medical history. We applied conventional logistic regression methods, light gradient boosting methods, neural networks, survival analysis, and one-class embedding classifier methods to effectively analyze high dimension data based on deep learning–based anomaly detection. Performance was measured with area under the curve and area under precision recall curve. We validated our models externally with a health checkup database from a tertiary hospital.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The one-class embedding classifier model received the highest area under the curve scores with values of 0.868, 0.849, 0.798, 0.746, 0.800, 0.749, and 0.790 for liver, lung, colorectal, pancreatic, gastric, breast, and cervical cancers, respectively. For area under precision recall curve, the light gradient boosting models had the highest score with values of 0.383, 0.401, 0.387, 0.300, 0.385, 0.357, and 0.296 for liver, lung, colorectal, pancreatic, gastric, breast, and cervical cancers, respectively.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our results show that it is possible to easily develop applicable cancer prediction models with nationwide claim data using machine learning. The 7 models showed acceptable performances and explainability, and thus can be distributed easily in real-world environments.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>prediction</kwd>
        <kwd>model</kwd>
        <kwd>claim data</kwd>
        <kwd>cancer</kwd>
        <kwd>machine learning</kwd>
        <kwd>development</kwd>
        <kwd>cohort</kwd>
        <kwd>validation</kwd>
        <kwd>database</kwd>
        <kwd>algorithm</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Cancer is a major cause of death, accounting for nearly 10 million deaths worldwide in 2020 [<xref ref-type="bibr" rid="ref1">1</xref>]. It is a preventable disease requiring major lifestyle modifications [<xref ref-type="bibr" rid="ref2">2</xref>], for which screening is important because it can help health care professionals with early detection and treatment of several types of cancer before they become aggravated [<xref ref-type="bibr" rid="ref3">3</xref>]. In the early stages, cancer is normally indolent and symptomless. Thus, nationwide cancer screening programs for the general population have been adopted in many countries [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. A national cancer control program (NCCP) framework, a public health program designed to mitigate the number of cancer cases and deaths and improve quality of life of patients, was proposed by the World Health Organization [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. In South Korea, the NCCP was designed in 1996 and implemented in 1999 to provide free screening services for low-income Medical Aid patients. Beginning in 2000, the NCCP has expanded its target population to include all National Health Insurance (NHI) recipients. Since that time, the survival rate of cancer patients has continued to improve. According to cancer registration statistics in 2013, the relative survival rate of cancer patients has increased to 70.3% [<xref ref-type="bibr" rid="ref10">10</xref>]. For 7 major cancer, namely, stomach, colorectal, breast, lung, cervical, pancreas, and liver cancer, every NHI beneficiary receives cancer screening tests mainly based on his or her age and gender. For instance, everyone ≥40 years old is examined by upper gastrointestinography or gastrointestinal endoscopy every 2 years to screen for stomach cancer. However, concerns have been raised about this one-size-fits-all cancer screening program because every procedure for cancer screening has its own risks for false-positive cases. For instance, false-positive cases of mammograms for screening breast cancer have resulted in many unnecessary invasive breast excisional biopsies, which reduce the quality of life in women [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. Thus, personalized cancer screening protocols based on patient’s individual risks have been in need since the NCCP was introduced [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. The National Health Insurance System (NHIS) has collected health checkup data since 2003 under a structured data format and made it available for researchers [<xref ref-type="bibr" rid="ref15">15</xref>]. There are two types of NHIS cohort data: a 1-million-person cohort sampled randomly from all NHI beneficiaries reflecting general characteristics of the entire South Korean population and a 500-thousand-person cohort sampled from those who received national health checkup services. All data include every diagnosis code and medications of each patient in all hospitals and clinics. For beneficiaries of national health checkup services, data include basic anthropometric measurements, laboratory values, past medical history, and family history. Despite the limited number of variables for the development of machine learning algorithms compared to electronic health records (EHRs) in hospitals, this type of data has the substantial advantages of a well-refined structured format and large sample size [<xref ref-type="bibr" rid="ref16">16</xref>]. The data structure of the NHIS cohort and the monthly claim data from every EHR in hospitals are the same; therefore, the developed patient-level prediction models can be implemented in any EHR system in South Korea. In this study, we aimed to develop practical patient-level prediction models of 7 major cancers with acceptable performances and explainability, which can be distributed easily in real-world environments.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Description</title>
        <p>We used an NHIS database to develop our cancer prediction models. The NHIS, a mandatory social insurance system, has collected health screening data at the national population level since the mid-1970s [<xref ref-type="bibr" rid="ref15">15</xref>]. As this is a centralized system, Korean health screening data can be centralized, while paid health care providers act on a per-service basis [<xref ref-type="bibr" rid="ref17">17</xref>]. The NHIS database consists of 2 different data sets: a health checkup cohort and a national sample cohort [<xref ref-type="bibr" rid="ref18">18</xref>]. We used the health checkup cohort in the learning process and included training and internal validation and the remaining national sample cohort for external validation.</p>
        <p>The NHIS provides a free health checkup program to all NHI members every 2 years. The health checkup cohort contains a total of 514,866 patients’ health checkup records randomly extracted from health insurance members who have undergone a heath checkup program. The national sample cohort contains about 1 million patient records corresponding to about 2.2% of the Korean population in 2002. This data set was collected by considering demographics, such as population, age, and geographic factors. Both data sets include social and economic eligibility variables, health resource utilization status, description, treatment details, disease type, prescription details, and clinic status. The NHIS data set statistics are presented in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Statistics of the National Health Insurance Service data sets (2002-2013).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="460"/>
            <col width="340"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Description</td>
                <td>Health checkup cohort, n</td>
                <td>National sample cohort, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Hospital</td>
                <td>51,920</td>
                <td>52,483</td>
              </tr>
              <tr valign="top">
                <td>Patients</td>
                <td>514,866</td>
                <td>1,113,656</td>
              </tr>
              <tr valign="top">
                <td>Prescriptions</td>
                <td>83,935,395</td>
                <td>83,935,395</td>
              </tr>
              <tr valign="top">
                <td>Visits</td>
                <td>96,534,359</td>
                <td>119,362,188</td>
              </tr>
              <tr valign="top">
                <td>Diagnostic codes (full code name)</td>
                <td>17,385</td>
                <td>19,626</td>
              </tr>
              <tr valign="top">
                <td>Diagnostic codes (first 3 digits)</td>
                <td>2160</td>
                <td>2319</td>
              </tr>
              <tr valign="top">
                <td>Annual patient visits, mean</td>
                <td>15.6</td>
                <td>8.9</td>
              </tr>
              <tr valign="top">
                <td>Diagnostic codes/visit, mean</td>
                <td>2.4</td>
                <td>2.5</td>
              </tr>
              <tr valign="top">
                <td>Drug/prescription, mean</td>
                <td>4.4</td>
                <td>4.4</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Study Population Definition</title>
        <p>It is mandatory that all cancer patients in South Korea be enrolled into a national cancer management program in the hospital where the cancer is diagnosed so that cancer patients only pay 5% of the total medical cost [<xref ref-type="bibr" rid="ref19">19</xref>]. This means that almost all cancer patients in South Korea can be identified by diagnosis codes registered in the NHIS database [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
        <p>We used the Korean Classification of Disease version 7, which is compatible with International Classification of Disease (ICD)-9 and defined the following 7 major cancers [<xref ref-type="bibr" rid="ref21">21</xref>]: liver cancer (malignant neoplasm of the liver and intrahepatic bile ducts), C22; lung cancer (malignant neoplasm of the bronchus and lung), C34; colorectal cancer (malignant neoplasm of the colon, rectosigmoid junction, and rectum), C18, C19, and C20; pancreatic cancer (malignant neoplasm of the pancreas), C25; stomach cancer (malignant neoplasm of the stomach), C16; and breast cancer (malignant neoplasm of the breast), C50; and cervical cancer (malignant neoplasm of the cervix uteri), C53.</p>
        <p>The prevalence of each cancer is presented in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>The number of cancer-free patients and the number of cancer patients diagnosed for each cancer.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="120"/>
            <col width="120"/>
            <col width="130"/>
            <col width="130"/>
            <col width="120"/>
            <col width="110"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td>Patient type</td>
                <td>Liver</td>
                <td>Lung</td>
                <td>Colorectal</td>
                <td>Pancreatic</td>
                <td>Stomach</td>
                <td>Breast</td>
                <td>Cervical</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Free, n</td>
                <td>234,659</td>
                <td>233,931</td>
                <td>233,203</td>
                <td>235,633</td>
                <td>232,493</td>
                <td>91,982</td>
                <td>92,736</td>
              </tr>
              <tr valign="top">
                <td>Diagnosed, n</td>
                <td>1587</td>
                <td>2335</td>
                <td>2845</td>
                <td>551</td>
                <td>3679</td>
                <td>1029</td>
                <td>306</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Input Features and Algorithms</title>
        <p>First, we used basic features consisting of simple demographic information, including age and gender, health examination, and survey results (18 features, level 1). Second, we added 11 more features obtained from a questionnaire, including the patient's medical history and family medical history (29 features, level 2). Third, we included 10 specific disease diagnostic records that appeared significant through univariate analysis for each cancer (39 features, level 3). The specific codes for each of the 10 cancers are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>To predict future cancers, we focused on cancer incidence within the next 5 years based on the time of screening. We first trained our predictive model with 4 common machine learning models: logistic regression (LR), random forest (RF), Light Gradient Boosting Machine (LGBM; a tree-based gradient boosting model), and multilayer perceptron (MLP). Further, we built a one-class embedding classifier (OCEC), which is a deep anomaly detection-based model (<xref rid="figure1" ref-type="fig">Figure 1</xref>). This method assumes that the data have one large class and several types of small anomalies not included in that class. This is an appropriate assumption because, while most people have normal screening records, few have cancer. To build our OCEC structure, we modified a deep one-class classification, the first deep learning–based anomaly detection model [<xref ref-type="bibr" rid="ref22">22</xref>]. We then added a small classifier to the latent space to predict future cancer. The hyperparameters used for training models are shown in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Concept of one-class embedding classifier.</p>
          </caption>
          <graphic xlink:href="medinform_v9i8e29807_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Model Evaluation Strategy</title>
        <p>We divided an entire health checkup cohort, with 80% placed into a training set and 20% placed into a validation set. The model was trained only with the training set while the internal validation set was not used in the learning process. After training, the model output a prediction score for the probability of developing cancer in the next 5 years after the input year.</p>
        <p>A cancer prediction problem is heavily imbalanced because the proportion of cancer-diagnosed patients is too small. In our data, the proportions of cancer-diagnosed patients were &#60;2% for all 7 cancers. Thus, we used the area under the receiver operating characteristic curve (AUROC) and area under the precision recall curve (AUPRC) score to evaluate our models. The AUROC is an evaluation metric with values between 0 and 1 that is widely used as an evaluation metric for the imbalance problem, while the AUPRC combines recall and precision and corresponds to the average of the precision according to the precision recall curve. The baseline for AUROC is always 0.5, meaning a random classifier would produce an AUROC of 0.5. However, with AUPRC, the baseline is equal to the fraction of positive cancer cases (number of positive examples/total number of examples). The baseline AUPRC for each cancer in both the internal and external validation sets is shown in <xref ref-type="table" rid="table3">Table 3</xref>.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>The baseline area under the precision recall curve for the internal and external validation sets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="120"/>
            <col width="120"/>
            <col width="130"/>
            <col width="130"/>
            <col width="120"/>
            <col width="110"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td>Validation set</td>
                <td>Liver</td>
                <td>Lung</td>
                <td>Colorectal</td>
                <td>Pancreatic</td>
                <td>Stomach</td>
                <td>Breast</td>
                <td>Cervical</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Internal validation</td>
                <td>4.45×10<sup>–3</sup></td>
                <td>6.03×10<sup>–3</sup></td>
                <td>7.72×10<sup>–3</sup></td>
                <td>1.50×10<sup>–3</sup></td>
                <td>1.04×10<sup>–2</sup></td>
                <td>7.65×10<sup>–3</sup></td>
                <td>2.39×10<sup>–3</sup></td>
              </tr>
              <tr valign="top">
                <td>External validation</td>
                <td>2.96×10<sup>–3</sup></td>
                <td>3.86×10<sup>–3</sup></td>
                <td>5.52×10<sup>–3</sup></td>
                <td>1.01×10<sup>–3</sup></td>
                <td>6.65×10<sup>–3</sup></td>
                <td>7.97×10<sup>–3</sup></td>
                <td>2.22×10<sup>–3</sup></td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>We evaluated the above metrics for both internal and external validation sets and compared the results. Additionally, for the external data set, we used the survival analysis method. We plotted Kaplan-Meier cumulative density curves to see the actual effectiveness of the predictive score. The study flow chart for learning and verification of the overall process is shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>.</p>
        <p>The NHIS institutional review board approved all data requests for research purposes (NHIS-2017-2-326). Because this public database is fully anonymized, institutional approval of Seoul National University Bundang Hospital (SNUBH) was waived by the institutional review board (X-2009-634-902).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Flow chart of the overall process. AUROC: area under the receiver operating characteristic curve.</p>
          </caption>
          <graphic xlink:href="medinform_v9i8e29807_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Performance of Cancer Prediction Models</title>
        <p><xref ref-type="table" rid="table4">Table 4</xref> shows the internal validation results for each cancer across the 5 models. Overall, the LGBM and deep learning models performed better than did LR and RF. The former models performed well in terms of AUROC and AUPRC scores. LR, the most widely used classic model, showed low AUPRC scores, while RF had a low AUROC.</p>
        <p>Notably, more than half of the OCEC AUROC scores were top rated compared to other models. Two models, OCEC and MLP, are both deep learning structured models. However, OCEC uses dense dimension reduction and performed better for both AUROC and AUPRC score compared to the MLP model. This shows that the anomaly-based one-class classification model can be a suitable deep learning structure for rare disease prediction.</p>
        <p>When looking at the internal validation results of each cancer, liver and lung cancers showed the best results (AUROC&#62;0.8), followed by stomach, pancreatic, and colorectal cancers (0.8&#62;AUROC&#62;0.7). Cervical and breast cancers (both female cancers) showed the lowest results (0.7&#62;AUROC&#62;0.6). The same findings also appeared in the external validation (<xref ref-type="table" rid="table5">Table 5</xref>).</p>
        <p>According to feature level, the results tended to improve as feature level increased from level 1 to 3, but this was not significant. However, in some cases, the opposite tendency was observed.</p>
        <p>The findings for the external validation score were similar to those of the internal score. Interestingly, the external validation scores (<xref ref-type="table" rid="table5">Table 5</xref>) were higher than the internal ones overall.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Internal validation performance of outcome prediction across models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="110"/>
            <col width="90"/>
            <col width="90"/>
            <col width="70"/>
            <col width="0"/>
            <col width="80"/>
            <col width="80"/>
            <col width="0"/>
            <col width="90"/>
            <col width="90"/>
            <col width="0"/>
            <col width="80"/>
            <col width="70"/>
            <col width="0"/>
            <col width="80"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td rowspan="2">Cancer  <break/>  
            type</td>
                <td rowspan="2">Feature  <break/>  
            level</td>
                <td colspan="3">LGBM<sup>a</sup></td>
                <td colspan="3">LR<sup>b</sup></td>
                <td colspan="3">RF<sup>c</sup></td>
                <td colspan="3">MLP<sup>d</sup></td>
                <td colspan="2">OCEC<sup>e</sup></td>
              </tr>
              <tr valign="bottom">
                <td>AUROC<sup>f</sup></td>
                <td>AUPRC<sup>g</sup></td>
                <td colspan="2">AUROC</td>
                <td>AUPRC</td>
                <td colspan="2">AUROC</td>
                <td>AUPRC</td>
                <td colspan="2">AUROC</td>
                <td>AUPRC</td>
                <td colspan="2">AUROC</td>
                <td>AUPRC</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="16">
                  <bold>Liver</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.858</td>
                <td>0.359</td>
                <td colspan="2">0.836</td>
                <td>0.045</td>
                <td colspan="2">0.748</td>
                <td>0.359</td>
                <td colspan="2">0.858</td>
                <td>0.296</td>
                <td colspan="2">0.857</td>
                <td>0.313</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.868</td>
                <td>0.363</td>
                <td colspan="2">0.841</td>
                <td>0.048</td>
                <td colspan="2">0.770</td>
                <td>0.342</td>
                <td colspan="2">0.856</td>
                <td>0.297</td>
                <td colspan="2">0.860</td>
                <td>0.301</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.871</td>
                <td>0.383</td>
                <td colspan="2">0.852</td>
                <td>0.080</td>
                <td colspan="2">0.788</td>
                <td>0.361</td>
                <td colspan="2">0.860</td>
                <td>0.315</td>
                <td colspan="2">0.868</td>
                <td>0.334</td>
              </tr>
              <tr valign="top">
                <td colspan="16">
                  <bold>Lung</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.845</td>
                <td>0.396</td>
                <td colspan="2">0.823</td>
                <td>0.106</td>
                <td colspan="2">0.735</td>
                <td>0.366</td>
                <td colspan="2">0.845</td>
                <td>0.360</td>
                <td colspan="2">0.849</td>
                <td>0.382</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.845</td>
                <td>0.395</td>
                <td colspan="2">0.822</td>
                <td>0.110</td>
                <td colspan="2">0.750</td>
                <td>0.366</td>
                <td colspan="2">0.832</td>
                <td>0.338</td>
                <td colspan="2">0.841</td>
                <td>0.338</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.845</td>
                <td>0.401</td>
                <td colspan="2">0.829</td>
                <td>0.130</td>
                <td colspan="2">0.754</td>
                <td>0.367</td>
                <td colspan="2">0.841</td>
                <td>0.345</td>
                <td colspan="2">0.843</td>
                <td>0.343</td>
              </tr>
              <tr valign="top">
                <td colspan="16">
                  <bold>Colorectal</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.790</td>
                <td>0.385</td>
                <td colspan="2">0.764</td>
                <td>0.055</td>
                <td colspan="2">0.707</td>
                <td>0.366</td>
                <td colspan="2">0.794</td>
                <td>0.347</td>
                <td colspan="2">0.795</td>
                <td>0.371</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.792</td>
                <td>0.387</td>
                <td colspan="2">0.767</td>
                <td>0.063</td>
                <td colspan="2">0.701</td>
                <td>0.363</td>
                <td colspan="2">0.790</td>
                <td>0.321</td>
                <td colspan="2">0.798</td>
                <td>0.342</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.794</td>
                <td>0.385</td>
                <td colspan="2">0.769</td>
                <td>0.075</td>
                <td colspan="2">0.704</td>
                <td>0.360</td>
                <td colspan="2">0.791</td>
                <td>0.322</td>
                <td colspan="2">0.796</td>
                <td>0.342</td>
              </tr>
              <tr valign="top">
                <td colspan="16">
                  <bold>Pancreatic</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.723</td>
                <td>0.300</td>
                <td colspan="2">0.724</td>
                <td>0.017</td>
                <td colspan="2">0.676</td>
                <td>0.316</td>
                <td colspan="2">0.744</td>
                <td>0.234</td>
                <td colspan="2">0.746</td>
                <td>0.259</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.720</td>
                <td>0.281</td>
                <td colspan="2">0.727</td>
                <td>0.018</td>
                <td colspan="2">0.669</td>
                <td>0.309</td>
                <td colspan="2">0.725</td>
                <td>0.240</td>
                <td colspan="2">0.745</td>
                <td>0.240</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.723</td>
                <td>0.271</td>
                <td colspan="2">0.730</td>
                <td>0.018</td>
                <td colspan="2">0.682</td>
                <td>0.311</td>
                <td colspan="2">0.730</td>
                <td>0.225</td>
                <td colspan="2">0.743</td>
                <td>0.231</td>
              </tr>
              <tr valign="top">
                <td colspan="16">
                  <bold>Stomach</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.787</td>
                <td>0.385</td>
                <td colspan="2">0.768</td>
                <td>0.086</td>
                <td colspan="2">0.713</td>
                <td>0.353</td>
                <td colspan="2">0.793</td>
                <td>0.348</td>
                <td colspan="2">0.798</td>
                <td>0.367</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.790</td>
                <td>0.382</td>
                <td colspan="2">0.770</td>
                <td>0.092</td>
                <td colspan="2">0.704</td>
                <td>0.351</td>
                <td colspan="2">0.796</td>
                <td>0.345</td>
                <td colspan="2">0.800</td>
                <td>0.345</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.791</td>
                <td>0.383</td>
                <td colspan="2">0.772</td>
                <td>0.108</td>
                <td colspan="2">0.715</td>
                <td>0.351</td>
                <td colspan="2">0.787</td>
                <td>0.329</td>
                <td colspan="2">0.795</td>
                <td>0.329</td>
              </tr>
              <tr valign="top">
                <td colspan="16">
                  <bold>Breast</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.684</td>
                <td>0.344</td>
                <td colspan="2">0.689</td>
                <td>0.077</td>
                <td colspan="2">0.666</td>
                <td>0.343</td>
                <td colspan="2">0.705</td>
                <td>0.325</td>
                <td colspan="2">0.713</td>
                <td>0.332</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.696</td>
                <td>0.345</td>
                <td colspan="2">0.696</td>
                <td>0.083</td>
                <td colspan="2">0.681</td>
                <td>0.346</td>
                <td colspan="2">0.706</td>
                <td>0.324</td>
                <td colspan="2">0.711</td>
                <td>0.327</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.722</td>
                <td>0.357</td>
                <td colspan="2">0.733</td>
                <td>0.129</td>
                <td colspan="2">0.689</td>
                <td>0.353</td>
                <td colspan="2">0.734</td>
                <td>0.339</td>
                <td colspan="2">0.749</td>
                <td>0.345</td>
              </tr>
              <tr valign="top">
                <td colspan="16">
                  <bold>Cervical</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.647</td>
                <td>0.268</td>
                <td colspan="2">0.667</td>
                <td>0.013</td>
                <td colspan="2">0.656</td>
                <td>0.273</td>
                <td colspan="2">0.671</td>
                <td>0.263</td>
                <td colspan="2">0.690</td>
                <td>0.265</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.672</td>
                <td>0.271</td>
                <td colspan="2">0.669</td>
                <td>0.012</td>
                <td colspan="2">0.632</td>
                <td>0.274</td>
                <td colspan="2">0.660</td>
                <td>0.266</td>
                <td colspan="2">0.670</td>
                <td>0.266</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.653</td>
                <td>0.296</td>
                <td colspan="2">0.612</td>
                <td>0.027</td>
                <td colspan="2">0.679</td>
                <td>0.301</td>
                <td colspan="2">0.638</td>
                <td>0.275</td>
                <td colspan="2">0.645</td>
                <td>0.279</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>LGBM: Light Gradient Boosting Model.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>LR: logistic regression.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>RF: random forest.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>MLP: multilayer perceptron.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>OCEC: one-class embedding classifier.</p>
            </fn>
            <fn id="table4fn6">
              <p><sup>f</sup>AUROC: area under receiver operator characteristics curve.</p>
            </fn>
            <fn id="table4fn7">
              <p><sup>g</sup>AUPRC: area under precision recall curve.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>External performance of outcome prediction across models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="90"/>
            <col width="90"/>
            <col width="70"/>
            <col width="80"/>
            <col width="80"/>
            <col width="0"/>
            <col width="80"/>
            <col width="90"/>
            <col width="0"/>
            <col width="80"/>
            <col width="80"/>
            <col width="0"/>
            <col width="80"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td rowspan="2">Cancer  <break/>  
            type</td>
                <td rowspan="2">Feature  <break/>  
            level</td>
                <td colspan="2">LGBM<sup>a</sup></td>
                <td colspan="3">LR<sup>b</sup></td>
                <td colspan="3">RF<sup>c</sup></td>
                <td colspan="3">MLP<sup>d</sup></td>
                <td colspan="2">OCEC<sup>e</sup></td>
              </tr>
              <tr valign="bottom">
                <td>AUROC<sup>f</sup></td>
                <td>AUPRC<sup>g</sup></td>
                <td>AUROC</td>
                <td>AUPRC</td>
                <td colspan="2">AUROC</td>
                <td>AUPRC</td>
                <td colspan="2">AUROC</td>
                <td>AUPRC</td>
                <td colspan="2">AUROC</td>
                <td>AUPRC</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="15">
                  <bold>Liver</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.910</td>
                <td>0.485</td>
                <td>0.893</td>
                <td>0.065</td>
                <td colspan="2">0.815</td>
                <td>0.502</td>
                <td colspan="2">0.911</td>
                <td>0.433</td>
                <td colspan="2">0.912</td>
                <td>0.442</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.909</td>
                <td>0.485</td>
                <td>0.895</td>
                <td>0.067</td>
                <td colspan="2">0.826</td>
                <td>0.488</td>
                <td colspan="2">0.900</td>
                <td>0.391</td>
                <td colspan="2">0.911</td>
                <td>0.433</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.915</td>
                <td>0.514</td>
                <td>0.907</td>
                <td>0.120</td>
                <td colspan="2">0.838</td>
                <td>0.527</td>
                <td colspan="2">0.910</td>
                <td>0.463</td>
                <td colspan="2">0.919</td>
                <td>0.471</td>
              </tr>
              <tr valign="top">
                <td colspan="15">
                  <bold>Lung</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.896</td>
                <td>0.465</td>
                <td>0.875</td>
                <td>0.097</td>
                <td colspan="2">0.789</td>
                <td>0.468</td>
                <td colspan="2">0.898</td>
                <td>0.431</td>
                <td colspan="2">0.897</td>
                <td>0.450</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.895</td>
                <td>0.463</td>
                <td>0.875</td>
                <td>0.104</td>
                <td colspan="2">0.788</td>
                <td>0.465</td>
                <td colspan="2">0.886</td>
                <td>0.296</td>
                <td colspan="2">0.894</td>
                <td>0.401</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.897</td>
                <td>0.464</td>
                <td>0.879</td>
                <td>0.118</td>
                <td colspan="2">0.794</td>
                <td>0.471</td>
                <td colspan="2">0.887</td>
                <td>0.402</td>
                <td colspan="2">0.894</td>
                <td>0.408</td>
              </tr>
              <tr valign="top">
                <td colspan="15">
                  <bold>Colorectal</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.872</td>
                <td>0.455</td>
                <td>0.858</td>
                <td>0.070</td>
                <td colspan="2">0.776</td>
                <td>0.482</td>
                <td colspan="2">0.883</td>
                <td>0.426</td>
                <td colspan="2">0.887</td>
                <td>0.449</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.874</td>
                <td>0.453</td>
                <td>0.858</td>
                <td>0.076</td>
                <td colspan="2">0.780</td>
                <td>0.481</td>
                <td colspan="2">0.874</td>
                <td>0.394</td>
                <td colspan="2">0.887</td>
                <td>0.423</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.877</td>
                <td>0.455</td>
                <td>0.859</td>
                <td>0.085</td>
                <td colspan="2">0.776</td>
                <td>0.473</td>
                <td colspan="2">0.882</td>
                <td>0.393</td>
                <td colspan="2">0.884</td>
                <td>0.415</td>
              </tr>
              <tr valign="top">
                <td colspan="15">
                  <bold>Pancreatic</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.891</td>
                <td>0.420</td>
                <td>0.884</td>
                <td>0.029</td>
                <td colspan="2">0.753</td>
                <td>0.456</td>
                <td colspan="2">0.898</td>
                <td>0.360</td>
                <td colspan="2">0.904</td>
                <td>0.336</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.888</td>
                <td>0.405</td>
                <td>0.884</td>
                <td>0.030</td>
                <td colspan="2">0.747</td>
                <td>0.450</td>
                <td colspan="2">0.883</td>
                <td>0.335</td>
                <td colspan="2">0.902</td>
                <td>0.337</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.885</td>
                <td>0.407</td>
                <td>0.886</td>
                <td>0.039</td>
                <td colspan="2">0.759</td>
                <td>0.450</td>
                <td colspan="2">0.883</td>
                <td>0.323</td>
                <td colspan="2">0.897</td>
                <td>0.336</td>
              </tr>
              <tr valign="top">
                <td colspan="15">
                  <bold>Stomach</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.889</td>
                <td>0.481</td>
                <td>0.863</td>
                <td>0.088</td>
                <td colspan="2">0.795</td>
                <td>0.478</td>
                <td colspan="2">0.891</td>
                <td>0.457</td>
                <td colspan="2">0.894</td>
                <td>0.440</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.891</td>
                <td>0.480</td>
                <td>0.864</td>
                <td>0.095</td>
                <td colspan="2">0.793</td>
                <td>0.479</td>
                <td colspan="2">0.887</td>
                <td>0.422</td>
                <td colspan="2">0.893</td>
                <td>0.436</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.889</td>
                <td>0.478</td>
                <td>0.864</td>
                <td>0.109</td>
                <td colspan="2">0.792</td>
                <td>0.473</td>
                <td colspan="2">0.885</td>
                <td>0.401</td>
                <td colspan="2">0.890</td>
                <td>0.413</td>
              </tr>
              <tr valign="top">
                <td colspan="15">
                  <bold>Breast</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.763</td>
                <td>0.485</td>
                <td>0.704</td>
                <td>0.108</td>
                <td colspan="2">0.750</td>
                <td>0.492</td>
                <td colspan="2">0.686</td>
                <td>0.406</td>
                <td colspan="2">0.753</td>
                <td>0.421</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.771</td>
                <td>0.488</td>
                <td>0.716</td>
                <td>0.106</td>
                <td colspan="2">0.745</td>
                <td>0.492</td>
                <td colspan="2">0.678</td>
                <td>0.396</td>
                <td colspan="2">0.697</td>
                <td>0.410</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.780</td>
                <td>0.497</td>
                <td>0.759</td>
                <td>0.143</td>
                <td colspan="2">0.757</td>
                <td>0.491</td>
                <td colspan="2">0.730</td>
                <td>0.411</td>
                <td colspan="2">0.745</td>
                <td>0.429</td>
              </tr>
              <tr valign="top">
                <td colspan="15">
                  <bold>Cervical</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="3">
                  <break/>
                </td>
                <td>Level 1</td>
                <td>0.729</td>
                <td>0.364</td>
                <td>0.742</td>
                <td>0.021</td>
                <td colspan="2">0.722</td>
                <td>0.375</td>
                <td colspan="2">0.671</td>
                <td>0.293</td>
                <td colspan="2">0.735</td>
                <td>0.336</td>
              </tr>
              <tr valign="top">
                <td>Level 2</td>
                <td>0.721</td>
                <td>0.370</td>
                <td>0.744</td>
                <td>0.018</td>
                <td colspan="2">0.715</td>
                <td>0.377</td>
                <td colspan="2">0.710</td>
                <td>0.338</td>
                <td colspan="2">0.732</td>
                <td>0.334</td>
              </tr>
              <tr valign="top">
                <td>Level 3</td>
                <td>0.749</td>
                <td>0.386</td>
                <td>0.760</td>
                <td>0.058</td>
                <td colspan="2">0.731</td>
                <td>0.400</td>
                <td colspan="2">0.744</td>
                <td>0.349</td>
                <td colspan="2">0.744</td>
                <td>0.354</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>LGBM: Light Gradient Boosting Model.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>LR: logistic regression.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>RF: random forest.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>MLP: multilayer perceptron.</p>
            </fn>
            <fn id="table5fn5">
              <p><sup>e</sup>OCEC: one-class embedding classifier.</p>
            </fn>
            <fn id="table5fn6">
              <p><sup>f</sup>AUROC: area under receiver operator characteristics curve.</p>
            </fn>
            <fn id="table5fn7">
              <p><sup>g</sup>AUPRC: area under precision recall curve.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Survival Analysis</title>
        <p>To unveil the actual cancer incidence according to the predicted value, we use a survival analysis method. We analyzed the prediction scores of the LGBM model, one of the best performing of the aforementioned models. The prediction score indicates the probability of developing cancer within 5 years from the screening date. Therefore, the closer the prediction score is to 1, the likelier it is that cancer will actually occur after a certain time. We analyzed 5 groups of patients by prediction scores: group 1 (prediction score ≥0.95), group 2 (prediction score ≥0.90), group 3 (prediction score ≥0.75), group 4 (prediction score ≥0.50), and total patient groups. We drew Kaplan-Meier cumulative density curves for each group and compared them. In <xref rid="figure3" ref-type="fig">Figure 3</xref>, the x-axis represents time from the screening date, and the y-axis the rate of cancer incidence within the group. All these analyses were performed with external validation data. As the proportion of cancer patients is &#60;1% for all cancers, the cumulative density curves are attached to the x-axis. The density curve of the group with the higher probability score is located at the higher cumulative density value (y-axis). These trends were collectively observed in all cancers and show the reliability of our models. Significantly, &#62;80% of patients in group 1 actually developed cancer within 5 years.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Kaplan-Meier cumulative density curves.</p>
          </caption>
          <graphic xlink:href="medinform_v9i8e29807_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Model Explainability</title>
        <p>With the LGBM and Shapley Additive Explanations (SHAP) method we can explain how the model outputs cancer prediction scores [<xref ref-type="bibr" rid="ref23">23</xref>]. We can evaluate which features are the most important to predicting future cancer. Moreover, it is possible to know whether a feature has a positive effect or a negative effect.</p>
        <p><xref ref-type="table" rid="table6">Table 6</xref> shows the top 5 features for predicting cancer incidence for each type of cancer. Overall, age was the most important variable as was gender except for women’s cancers. In addition, drinking frequency, alcohol consumption, and total cholesterol levels were all relevant factors.</p>
        <p>In particular, aspartate aminotransferase and gamma-glutamyl transferase levels are important for liver cancer. Smoking frequency is an important variable in lung cancer but not in other cancers. Similarly, drinking is the third most important feature for stomach cancer. In breast and pancreatic cancers, blood glucose levels were a more important variable than they were for other cancers. For further details on SHAP values including correlations between each variable and cancer prediction, see <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Top 5 features by Shapley Additive Explanations. </p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="110"/>
            <col width="150"/>
            <col width="160"/>
            <col width="150"/>
            <col width="140"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Liver</td>
                <td>Lung</td>
                <td>Colorectal</td>
                <td>Pancreatic</td>
                <td>Stomach</td>
                <td>Breast</td>
                <td>Cervical</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Age</td>
                <td>Age</td>
                <td>Age</td>
                <td>Age</td>
                <td>Age</td>
                <td>Age</td>
                <td>Age</td>
              </tr>
              <tr valign="top">
                <td>GTP<sup>a</sup></td>
                <td>Smoking</td>
                <td>Sex</td>
                <td>Hemoglobin</td>
                <td>Sex</td>
                <td>BMI</td>
                <td>Fasting glucose</td>
              </tr>
              <tr valign="top">
                <td>AST<sup>b</sup></td>
                <td>Sex</td>
                <td>BMI</td>
                <td>Total cholesterol</td>
                <td>BMI</td>
                <td>Total cholesterol</td>
                <td>BMI</td>
              </tr>
              <tr valign="top">
                <td>Total cholesterol</td>
                <td>BMI</td>
                <td>Total cholesterol</td>
                <td>BP<sup>c</sup> (high)</td>
                <td>Drinking habit</td>
                <td>Fasting glucose</td>
                <td>Conjunctivitis</td>
              </tr>
              <tr valign="top">
                <td>BMI</td>
                <td>GTP</td>
                <td>Fasting glucose</td>
                <td>BMI</td>
                <td>Hemoglobin</td>
                <td>BP (high)</td>
                <td>Total cholesterol</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>GTP: guanosine triphosphate.</p>
            </fn>
            <fn id="table6fn2">
              <p><sup>b</sup>AST: aspartate aminotransferase.</p>
            </fn>
            <fn id="table6fn3">
              <p><sup>c</sup>BP: blood pressure.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec>
      <title>Discussion</title>
      <p>In this study, we used nationwide population-based health care data to construct a machine learning model to predict the future incidence of 7 common types of cancer: liver, stomach, colorectal, lung, pancreatic, breast, and cervical cancer.</p>
      <p>Among the 5 distinct models, the LGBM and OCEC, which is our original structure, performed best. Both models had a higher AUROC and AUPRC than did the other models. Interestingly, OCEC scored best in terms of AUROC score and outperformed the normal deep learning method (MLP). Our dense dimension reduction method with one-class anomaly insights was the best model structure.</p>
      <p>All models performed well on the external validation set; therefore, it was a success in terms of generalization. Actually, the external validation results were even better than those of the internal validation, thus ensuring the generalizability of our models. We believe that this result was obtained due to the different sampling methods use between the training and validation cohort: the training data set consisted of only those with health checkup information, whereas the validation data set was sampled based on patients' demographic information. As such, the national sample cohort has a similar distribution to the health checkup cohort. In addition, the national sample cohort has a sufficient number of data samples, thus producing good external validation results.</p>
      <p>We drew a Kaplan-Meier cumulative density curve for the LGBM model, which is the traditional way to determining whether the marker (prediction score in this case) is suitable to predict cancer occurrence. More than 80% of the people with a prediction score ≥0.95 actually developed cancer within 5 years from the screening date. This is a significant result, which shows that our model can be a powerful tool for identifying high-risk groups. These high-risk groups could then take precautions before the cancer develops. In female cancers, such as breast and cervical cancer, the predictive power was lower than in other cancers. This is probably because both the size of the total female data sample and the number of cancer patients were relatively small. On the other hand, the predictive power for liver and lung cancer was very high. Our data set included liver-related features such as glutamic oxaloacetic transaminase and glutamate pyruvate transaminase. Moreover, we believed that smoking- and drinking-related features also helped predict these cancers. Accordingly, we can conclude that securing high-quality features and a large amount of data can improve predictive power.</p>
      <p>There have been previous attempts to develop cancer prediction models with various input features. Japanese researchers developed a prediction model for the 10-year risk of hepatocellular carcinoma in middle-aged Japanese people using data obtained from 17,654 Japanese aged 40 to 69 years who participated in regular health checkups [<xref ref-type="bibr" rid="ref24">24</xref>]. They obtained a higher AUROC (0.933) than did our models (0.912 in level 1 feature set). However, they did not provide AUPRC, which is important in real-world settings. Furthermore, they used viral markers of hepatitis virus B and C, which are not commonly checked in the normal population. Compared to the previous model, our model used general input features that are easily obtainable, and we acquired a comparable AUROC to the previous model. A Korean research group developed a risk prediction model using Cox proportional hazard regression models for colorectal cancer with a population of 846,559 men and 479,449 women who participated in health examinations by the National Health Insurance Corporation, and they obtained C statistics between 0.69 and 0.78 [<xref ref-type="bibr" rid="ref25">25</xref>]. They used a similar data set with a different timespan (from 1997 to1997) from our data set and obtained a similar performance to our model (0.730 vs 0.780) This means the performance of classifiers tends to depend on the training data set characteristics rather than the data and time windows. In another study, a multivariable lung cancer risk prediction model including low-dose computed tomography screening results from 22,229 participants obtained an AUROC of 0.761, which is lower than that of our model (0.898 in the MLP model) [<xref ref-type="bibr" rid="ref26">26</xref>]. Importantly, our model showed a higher performance with an AUROC of 0.875 in a simple linear model (logistic regression with level 1 input features).</p>
      <p>In terms of real-world implementation, this study has several implications. Thus far, many studies using machine learning have been conducted on EHR time sequence data. One study aimed to predict heart failure from EHR data [<xref ref-type="bibr" rid="ref27">27</xref>], and others focused on diabetes development [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>] or hypertension [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. Furthermore, a few studies have used nationwide claim health checkup data to create a cancer prediction model [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. To solve the overdiagnosis problem of cancer screening programs resulting in unnecessary intervention, accurate, easy-to-implement, patient-level models should be developed. Applying the developed algorithms in previous studies to hospital sites requires considerable effort because the data structure of the developed model differs from that of hospitals. However, our models have the same data structure as the national health care claim data generated on a monthly basis, which means that our models can be directly applied to EHR and makes this study meaningful in terms of its easy applicability. In addition, since we applied an explainable model to LGBM, every doctor can access the modifiable risk factors from the predicted results.</p>
      <p>Our research has several limitations. First, this study used only South Korean nationwide claim data. Depending on the country, the performance of the developed algorithms can differ. The value of NHIS data is well-known, and the data have been used in previous epidemiologic studies. Furthermore, we validated the developed algorithms using another database. Future additional external model validations using claim data from other countries can provide robustness to the models. Second, comparative effectiveness research is needed to prove the usefulness of the developed models. Conventional screening models can be compared to new patient-level prediction models in terms of cost and the number of false-positives avoided by the new models.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Ten disease codes used as features for each cancer.</p>
        <media xlink:href="medinform_v9i8e29807_app1.docx" xlink:title="DOCX File , 13 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Hyperparameters used for training models.</p>
        <media xlink:href="medinform_v9i8e29807_app2.docx" xlink:title="DOCX File , 13 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Shapley Additive Explanations (SHAP) summary plot for each cancer.</p>
        <media xlink:href="medinform_v9i8e29807_app3.docx" xlink:title="DOCX File , 378 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUPRC</term>
          <def>
            <p>area under precision recall curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUROC</term>
          <def>
            <p>area under receiver operator characteristics curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LGBM</term>
          <def>
            <p>Light Gradient Boosting Machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LR</term>
          <def>
            <p>logistic regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MLP</term>
          <def>
            <p>multilayer perceptron</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NCCP</term>
          <def>
            <p>national cancer control program</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">NHI</term>
          <def>
            <p>National Health Insurance</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">NHIS</term>
          <def>
            <p>National Health Insurance System</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">OCEC</term>
          <def>
            <p>one-class embedding classifier</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">SHAP</term>
          <def>
            <p>Shapley Additive Explanations</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">SNUBH</term>
          <def>
            <p>Seoul National University Bundang Hospital</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by the SNUBH Research Fund (grant #14-2017-0018), the National Research Foundation of Korea grant funded by the Korea government (NRF-2017R1E1A1A03070105 and NRF-2019R1A5A1028324), and the Institute for Information &#38; Communications Technology Promotion grant funded by the Korea government (Artificial Intelligence Graduate School Program [POSTECH]; #2019-0-01906).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Global Cancer Observatory</article-title>
          <source>World Health Organization</source>
          <access-date>2021-04-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://gco.iarc.fr/">https://gco.iarc.fr/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anand</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kunnumakkara</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Kunnumakara</surname>
              <given-names>Ajaikumar B</given-names>
            </name>
            <name name-style="western">
              <surname>Sundaram</surname>
              <given-names>Chitra</given-names>
            </name>
            <name name-style="western">
              <surname>Harikumar</surname>
              <given-names>Kuzhuvelil B</given-names>
            </name>
            <name name-style="western">
              <surname>Tharakan</surname>
              <given-names>Sheeja T</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>Oiki S</given-names>
            </name>
            <name name-style="western">
              <surname>Sung</surname>
              <given-names>Bokyung</given-names>
            </name>
            <name name-style="western">
              <surname>Aggarwal</surname>
              <given-names>Bharat B</given-names>
            </name>
          </person-group>
          <article-title>Cancer is a preventable disease that requires major lifestyle changes</article-title>
          <source>Pharm Res</source>
          <year>2008</year>
          <month>09</month>
          <volume>25</volume>
          <issue>9</issue>
          <fpage>2097</fpage>
          <lpage>116</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18626751"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11095-008-9661-9</pub-id>
          <pub-id pub-id-type="medline">18626751</pub-id>
          <pub-id pub-id-type="pmcid">PMC2515569</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Centers for Disease ControlPrevention (CDC)</collab>
          </person-group>
          <article-title>Cancer screening - United States, 2010</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2012</year>
          <month>01</month>
          <day>27</day>
          <volume>61</volume>
          <issue>3</issue>
          <fpage>41</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/mmwr/preview/mmwrhtml/mm6103a1.htm"/>
          </comment>
          <pub-id pub-id-type="medline">22278157</pub-id>
          <pub-id pub-id-type="pii">mm6103a1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fracheboud</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>de Koning</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Boer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Groenewoud</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Verbeek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Broeders</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>van Ineveld</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hendriks</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>de Bruyn</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Holland</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>van der Maas</surname>
              <given-names>P</given-names>
            </name>
            <collab>National Evaluation Team for Breast cancer screening in The Netherlands</collab>
          </person-group>
          <article-title>Nationwide breast cancer screening programme fully implemented in The Netherlands</article-title>
          <source>Breast</source>
          <year>2001</year>
          <month>02</month>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>6</fpage>
          <lpage>11</lpage>
          <pub-id pub-id-type="doi">10.1054/brst.2000.0212</pub-id>
          <pub-id pub-id-type="medline">14965550</pub-id>
          <pub-id pub-id-type="pii">S0960-9776(00)90212-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Koning</surname>
              <given-names>Hj</given-names>
            </name>
          </person-group>
          <article-title>Assessment of nationwide cancer-screening programmes</article-title>
          <source>The Lancet</source>
          <year>2000</year>
          <month>01</month>
          <volume>355</volume>
          <issue>9198</issue>
          <fpage>80</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1016/s0140-6736(99)00419-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Romero</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Trapani</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tittenbrun</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Given</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hohman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Torode</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Boniol</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ilbawi</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>National cancer control plans: a global analysis</article-title>
          <source>The Lancet Oncology</source>
          <year>2018</year>
          <month>10</month>
          <volume>19</volume>
          <issue>10</issue>
          <fpage>e546</fpage>
          <lpage>e555</lpage>
          <pub-id pub-id-type="doi">10.1016/s1470-2045(18)30681-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suh</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Woo</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>National cancer screening program for gastric cancer in Korea: Nationwide treatment benefit and cost</article-title>
          <source>Cancer</source>
          <year>2020</year>
          <month>01</month>
          <day>01</day>
          <volume>126</volume>
          <issue>9</issue>
          <fpage>1929</fpage>
          <lpage>1939</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/cncr.32753"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/cncr.32753</pub-id>
          <pub-id pub-id-type="medline">32031687</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Geller</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Greinert</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sinclair</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Weinstock</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Aitken</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Boniol</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Capellaro</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Doré</surname>
              <given-names>Jean-Francois</given-names>
            </name>
            <name name-style="western">
              <surname>Elwood</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fletcher</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Gallagher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gandini</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Halpern</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Katalinic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lucas</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Marghoob</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Nolte</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schüz</surname>
              <given-names>Joachim</given-names>
            </name>
            <name name-style="western">
              <surname>Tucker</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Volkmer</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Breitbart</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>A nationwide population-based skin cancer screening in Germany: proceedings of the first meeting of the International Task Force on Skin Cancer Screening and Prevention (September 24 and 25, 2009)</article-title>
          <source>Cancer Epidemiol</source>
          <year>2010</year>
          <month>06</month>
          <volume>34</volume>
          <issue>3</issue>
          <fpage>355</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/j.canep.2010.03.006</pub-id>
          <pub-id pub-id-type="medline">20381443</pub-id>
          <pub-id pub-id-type="pii">S1877-7821(10)00039-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <article-title>Published Online First:3 February 2017</article-title>
          <source>WHO &#124; National Cancer Control Programmes (NCCP)</source>
          <access-date>2021-04-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/cancer/nccp/en/">https://www.who.int/cancer/nccp/en/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Yeonju</given-names>
            </name>
            <name name-style="western">
              <surname>Jun</surname>
              <given-names>Jae Kwan</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Kui Sun</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Hoo-Yeon</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Eun-Cheol</given-names>
            </name>
          </person-group>
          <article-title>Overview of the National Cancer screening programme and the cancer screening status in Korea</article-title>
          <source>Asian Pac J Cancer Prev</source>
          <year>2011</year>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>725</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://journal.waocp.org/?sid=Entrez:PubMed&#38;id=pmid:21627372&#38;key=2011.12.3.725"/>
          </comment>
          <pub-id pub-id-type="medline">21627372</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gandomkar</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Norsuddin</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Thoms</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Characteristics of frequently recalled false positive cases in screening mammography</article-title>
          <year>2020</year>
          <conf-name>The 15th International Workshop on Breast Imaging (IWBI2020)</conf-name>
          <conf-date>24-27 May 2020</conf-date>
          <conf-loc>Leuven, Belgium</conf-loc>
          <pub-id pub-id-type="doi">10.1117/12.2560290</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Le</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Mothersill</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Seymour</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>McNeill</surname>
              <given-names>FE</given-names>
            </name>
          </person-group>
          <article-title>Is the false-positive rate in mammography in North America too high?</article-title>
          <source>Br J Radiol</source>
          <year>2016</year>
          <month>09</month>
          <volume>89</volume>
          <issue>1065</issue>
          <fpage>20160045</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27187600"/>
          </comment>
          <pub-id pub-id-type="doi">10.1259/bjr.20160045</pub-id>
          <pub-id pub-id-type="medline">27187600</pub-id>
          <pub-id pub-id-type="pmcid">PMC5124917</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Walker</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Enderling</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A new paradigm for personalized cancer screening</article-title>
          <source>bioRxiv</source>
          <year>2018</year>
          <access-date>2021-04-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.biorxiv.org/content/10.1101/265959v1">https://www.biorxiv.org/content/10.1101/265959v1</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Román</surname>
              <given-names>Marta</given-names>
            </name>
            <name name-style="western">
              <surname>Sala</surname>
              <given-names>Maria</given-names>
            </name>
            <name name-style="western">
              <surname>Domingo</surname>
              <given-names>Laia</given-names>
            </name>
            <name name-style="western">
              <surname>Posso</surname>
              <given-names>Margarita</given-names>
            </name>
            <name name-style="western">
              <surname>Louro</surname>
              <given-names>Javier</given-names>
            </name>
            <name name-style="western">
              <surname>Castells</surname>
              <given-names>Xavier</given-names>
            </name>
          </person-group>
          <article-title>Personalized breast cancer screening strategies: A systematic review and quality assessment</article-title>
          <source>PLoS One</source>
          <year>2019</year>
          <month>12</month>
          <day>16</day>
          <volume>14</volume>
          <issue>12</issue>
          <fpage>e0226352</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0226352"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0226352</pub-id>
          <pub-id pub-id-type="medline">31841563</pub-id>
          <pub-id pub-id-type="pii">PONE-D-19-12259</pub-id>
          <pub-id pub-id-type="pmcid">PMC6913984</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seong</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Khang</surname>
              <given-names>YH</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>HC</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Do</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Jeong</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Cohort profile: the National Health Insurance Service-National Health Screening Cohort (NHIS-HEALS) in Korea</article-title>
          <source>BMJ Open</source>
          <year>2017</year>
          <month>09</month>
          <day>24</day>
          <volume>7</volume>
          <issue>9</issue>
          <fpage>e016640</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=28947447"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2017-016640</pub-id>
          <pub-id pub-id-type="medline">28947447</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2017-016640</pub-id>
          <pub-id pub-id-type="pmcid">PMC5623538</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Learning from experts: developing transferable deep features for patient-level lung cancer prediction</article-title>
          <year>2016</year>
          <conf-name>International Conference on Medical Image Computing and Computer-Assisted Intervention</conf-name>
          <conf-date>17-21 Oct 2016</conf-date>
          <conf-loc>Athens</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-319-46723-8_15</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kwon</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Thirty years of national health insurance in South Korea: lessons for achieving universal health care coverage</article-title>
          <source>Health Policy Plan</source>
          <year>2009</year>
          <month>01</month>
          <day>12</day>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>63</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1093/heapol/czn037</pub-id>
          <pub-id pub-id-type="medline">19004861</pub-id>
          <pub-id pub-id-type="pii">czn037</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Yong Ho</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>Kyungdo</given-names>
            </name>
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>Seung Hyun</given-names>
            </name>
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>Kyung Soo</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Ki Up</given-names>
            </name>
            <collab>Taskforce Team of Diabetes Fact Sheet of the Korean Diabetes Association</collab>
          </person-group>
          <article-title>data analytic process of a nationwide population-based study using national health information database established by national health insurance service</article-title>
          <source>Diabetes Metab J</source>
          <year>2016</year>
          <month>02</month>
          <volume>40</volume>
          <issue>1</issue>
          <fpage>79</fpage>
          <lpage>82</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://e-dmj.org/DOIx.php?id=10.4093/dmj.2016.40.1.79"/>
          </comment>
          <pub-id pub-id-type="doi">10.4093/dmj.2016.40.1.79</pub-id>
          <pub-id pub-id-type="medline">26912157</pub-id>
          <pub-id pub-id-type="pii">40.79</pub-id>
          <pub-id pub-id-type="pmcid">PMC4768054</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>Seri</given-names>
            </name>
            <name name-style="western">
              <surname>Won</surname>
              <given-names>Young-Joo</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Young Ran</given-names>
            </name>
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>Kyu-Won</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>Hyun-Joo</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Eun Sook</given-names>
            </name>
            <collab>Community of Population-Based Regional Cancer Registries</collab>
          </person-group>
          <article-title>Cancer statistics in Korea: incidence, mortality, survival, and prevalence in 2017</article-title>
          <source>Cancer Res Treat</source>
          <year>2020</year>
          <month>04</month>
          <volume>52</volume>
          <issue>2</issue>
          <fpage>335</fpage>
          <lpage>350</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.4143/crt.2020.206"/>
          </comment>
          <pub-id pub-id-type="doi">10.4143/crt.2020.206</pub-id>
          <pub-id pub-id-type="medline">32178489</pub-id>
          <pub-id pub-id-type="pii">crt.2020.206</pub-id>
          <pub-id pub-id-type="pmcid">PMC7176962</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <article-title>NHI program</article-title>
          <source>h-well NHIS</source>
          <access-date>2021-04-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nhis.or.kr/static/html/wbd/g/a/wbdga0405.html">https://www.nhis.or.kr/static/html/wbd/g/a/wbdga0405.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <article-title>Statistics Korea news</article-title>
          <source>Statistics Korea</source>
          <access-date>2021-04-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://kostat.go.kr/portal/eng/news/3/index.board?bmode=read&#38;aSeq=71706">http://kostat.go.kr/portal/eng/news/3/index.board?bmode=read&#38;aSeq=71706</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruff</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Vandermeulen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Goernitz</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Deecke</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Siddiqui</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Binder</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Müller</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kloft</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Deep one-class classification</article-title>
          <year>2018</year>
          <conf-name>The 35th International Conference on Machine Learning</conf-name>
          <conf-date>10-15 July 2018</conf-date>
          <conf-loc>Stockholm</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lundberg</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Su-In</surname>
              <given-names>Lee</given-names>
            </name>
          </person-group>
          <article-title>A unified approach to interpreting model predictions</article-title>
          <source>arXiv</source>
          <access-date>2021-04-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1705.07874">http://arxiv.org/abs/1705.07874</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Michikawa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Inoue</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sawada</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Iwasaki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tanaka</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shimazu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sasazuki</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yamaji</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mizokami</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tsugane</surname>
              <given-names>S</given-names>
            </name>
            <collab>Japan Public Health Center-based Prospective Study Group</collab>
          </person-group>
          <article-title>Development of a prediction model for 10-year risk of hepatocellular carcinoma in middle-aged Japanese: the Japan Public Health Center-based Prospective Study Cohort II</article-title>
          <source>Prev Med</source>
          <year>2012</year>
          <month>08</month>
          <volume>55</volume>
          <issue>2</issue>
          <fpage>137</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ypmed.2012.05.017</pub-id>
          <pub-id pub-id-type="medline">22676909</pub-id>
          <pub-id pub-id-type="pii">S0091-7435(12)00217-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Joo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bak</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Nam</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Risk prediction model for colorectal cancer: National Health Insurance Corporation study, Korea</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <month>2</month>
          <day>12</day>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>e88079</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0088079"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0088079</pub-id>
          <pub-id pub-id-type="medline">24533067</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-28997</pub-id>
          <pub-id pub-id-type="pmcid">PMC3922771</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tammemägi</surname>
              <given-names>Martin C</given-names>
            </name>
            <name name-style="western">
              <surname>Ten Haaf</surname>
              <given-names>Kevin</given-names>
            </name>
            <name name-style="western">
              <surname>Toumazis</surname>
              <given-names>Iakovos</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>Chung Yin</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>Summer S</given-names>
            </name>
            <name name-style="western">
              <surname>Jeon</surname>
              <given-names>Jihyoun</given-names>
            </name>
            <name name-style="western">
              <surname>Commins</surname>
              <given-names>John</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>Thomas</given-names>
            </name>
            <name name-style="western">
              <surname>Meza</surname>
              <given-names>Rafael</given-names>
            </name>
          </person-group>
          <article-title>Development and validation of a multivariable lung cancer risk prediction model that includes low-dose computed tomography screening results: a secondary analysis of data from the national lung screening trial</article-title>
          <source>JAMA Netw Open</source>
          <year>2019</year>
          <month>03</month>
          <day>01</day>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>e190204</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jamanetwork.com/journals/jamanetworkopen/fullarticle/10.1001/jamanetworkopen.2019.0204"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2019.0204</pub-id>
          <pub-id pub-id-type="medline">30821827</pub-id>
          <pub-id pub-id-type="pii">2726714</pub-id>
          <pub-id pub-id-type="pmcid">PMC6484623</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Steinhubl</surname>
              <given-names>Steven R</given-names>
            </name>
            <name name-style="western">
              <surname>deFilippi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dey</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Early detection of heart failure using electronic health records: practical implications for time before diagnosis, data diversity, data quantity, and data density</article-title>
          <source>Circ Cardiovasc Qual Outcomes</source>
          <year>2016</year>
          <month>11</month>
          <volume>9</volume>
          <issue>6</issue>
          <fpage>649</fpage>
          <lpage>658</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28263940"/>
          </comment>
          <pub-id pub-id-type="doi">10.1161/CIRCOUTCOMES.116.002797</pub-id>
          <pub-id pub-id-type="medline">28263940</pub-id>
          <pub-id pub-id-type="pii">CIRCOUTCOMES.116.002797</pub-id>
          <pub-id pub-id-type="pmcid">PMC5341145</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Keshavjee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Guergachi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Predictive models for diabetes mellitus using machine learning techniques</article-title>
          <source>BMC Endocr Disord</source>
          <year>2019</year>
          <month>10</month>
          <day>15</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>101</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcendocrdisord.biomedcentral.com/articles/10.1186/s12902-019-0436-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12902-019-0436-6</pub-id>
          <pub-id pub-id-type="medline">31615566</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12902-019-0436-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6794897</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Badholia</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Predictive modelling and analytics for diabetes using a machine learning approach</article-title>
          <source>ITII</source>
          <year>2021</year>
          <month>02</month>
          <day>28</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>215</fpage>
          <lpage>223</lpage>
          <pub-id pub-id-type="doi">10.17762/itii.v9i1.121</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Daanouni</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Cherradi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Tmiri</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Type 2 diabetes mellitus prediction model based on machine learning approach</article-title>
          <year>2019</year>
          <conf-name>Fourth International Conference on Smart City Applications (SCA2019)</conf-name>
          <conf-date>2-4 October 2019</conf-date>
          <conf-loc>Casablanca, Morocco</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-030-37629-1_33</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kanegae</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Suzuki</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Fukatani</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ito</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Harada</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kario</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Highly precise risk prediction model for new-onset hypertension using artificial intelligence techniques</article-title>
          <source>J Clin Hypertens (Greenwich)</source>
          <year>2020</year>
          <month>03</month>
          <day>09</day>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>445</fpage>
          <lpage>450</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1111/jch.13759"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/jch.13759</pub-id>
          <pub-id pub-id-type="medline">31816148</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elshawi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Mallah</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Sakr</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>On the interpretability of machine learning-based model for predicting hypertension</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <month>07</month>
          <day>29</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>146</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-0874-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-0874-0</pub-id>
          <pub-id pub-id-type="medline">31357998</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-0874-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6664803</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sihto</surname>
              <given-names>Harri</given-names>
            </name>
            <name name-style="western">
              <surname>Lundin</surname>
              <given-names>Johan</given-names>
            </name>
            <name name-style="western">
              <surname>Lundin</surname>
              <given-names>Mikael</given-names>
            </name>
            <name name-style="western">
              <surname>Lehtimäki</surname>
              <given-names>Tiina</given-names>
            </name>
            <name name-style="western">
              <surname>Ristimäki</surname>
              <given-names>Ari</given-names>
            </name>
            <name name-style="western">
              <surname>Holli</surname>
              <given-names>Kaija</given-names>
            </name>
            <name name-style="western">
              <surname>Sailas</surname>
              <given-names>Liisa</given-names>
            </name>
            <name name-style="western">
              <surname>Kataja</surname>
              <given-names>Vesa</given-names>
            </name>
            <name name-style="western">
              <surname>Turpeenniemi-Hujanen</surname>
              <given-names>Taina</given-names>
            </name>
            <name name-style="western">
              <surname>Isola</surname>
              <given-names>Jorma</given-names>
            </name>
            <name name-style="western">
              <surname>Heikkilä</surname>
              <given-names>Päivi</given-names>
            </name>
            <name name-style="western">
              <surname>Joensuu</surname>
              <given-names>Heikki</given-names>
            </name>
          </person-group>
          <article-title>Breast cancer biological subtypes and protein expression predict for the preferential distant metastasis sites: a nationwide cohort study</article-title>
          <source>Breast Cancer Res</source>
          <year>2011</year>
          <month>09</month>
          <day>13</day>
          <volume>13</volume>
          <issue>5</issue>
          <fpage>R87</fpage>
          <pub-id pub-id-type="doi">10.1186/bcr2944</pub-id>
          <pub-id pub-id-type="medline">21914172</pub-id>
          <pub-id pub-id-type="pii">bcr2944</pub-id>
          <pub-id pub-id-type="pmcid">PMC3262199</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>KN</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>C</given-names>
            </name>
            <collab>Taiwan Gastrointestinal Disease Helicobacter Consortium</collab>
          </person-group>
          <article-title>A tool to predict risk for gastric cancer in patients with peptic ulcer disease on the basis of a nationwide cohort</article-title>
          <source>Clin Gastroenterol Hepatol</source>
          <year>2015</year>
          <month>02</month>
          <volume>13</volume>
          <issue>2</issue>
          <fpage>287</fpage>
          <lpage>293.e1</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cgh.2014.07.043</pub-id>
          <pub-id pub-id-type="medline">25083561</pub-id>
          <pub-id pub-id-type="pii">S1542-3565(14)01088-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zelic</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Garmo</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zugna</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Stattin</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Richiardi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Akre</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Pettersson</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Corrigendum re "Predicting prostate cancer death with different pretreatment risk stratification tools: a head-to-head comparison in a nationwide cohort study" [Eur Urol 2020;77:180-8]</article-title>
          <source>Eur Urol</source>
          <year>2020</year>
          <month>07</month>
          <volume>78</volume>
          <issue>1</issue>
          <fpage>e45</fpage>
          <lpage>e47</lpage>
          <pub-id pub-id-type="doi">10.1016/j.eururo.2020.03.016</pub-id>
          <pub-id pub-id-type="medline">32386780</pub-id>
          <pub-id pub-id-type="pii">S0302-2838(20)30191-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ali Khan</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Fallah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sundquist</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sundquist</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Brenner</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kharazmi</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Risk of colorectal cancer in patients with diabetes mellitus: A Swedish nationwide cohort study</article-title>
          <source>PLoS Med</source>
          <year>2020</year>
          <month>11</month>
          <day>13</day>
          <volume>17</volume>
          <issue>11</issue>
          <fpage>e1003431</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pmed.1003431"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1003431</pub-id>
          <pub-id pub-id-type="medline">33186354</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-20-03051</pub-id>
          <pub-id pub-id-type="pmcid">PMC7665813</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
