<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v14i1e75862</article-id>
      <article-id pub-id-type="pmid">34898427</article-id>
      <article-id pub-id-type="doi">10.2196/75862</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Comparison of Feature Selection Methods in Machine Learning Models of Cancer Information Seeking Among United States Adults: Cross-Sectional Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Coristine</surname>
            <given-names>Andrew</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Liu</surname>
            <given-names>Xin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mohanadas</surname>
            <given-names>Sadhasivam</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Alemede</surname>
            <given-names>Vincent</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Obisesan Olawuni</surname>
            <given-names>Kudirat Abidemi</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Potla</surname>
            <given-names>Ravi Teja</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Patel</surname>
            <given-names>Mayank Bharatkumar</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chowdhury</surname>
            <given-names>Shaika</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Ying</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biostatistics and Epidemiology, College of Public Health</institution>
            <institution>East Tennessee State University</institution>
            <addr-line>P.O. Box 70259</addr-line>
            <addr-line>1276 Gilbreath Dr.</addr-line>
            <addr-line>Johnson City, TN, 37601</addr-line>
            <country>United States</country>
            <phone>1 4234396662</phone>
            <email>liuy09@etsu.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4602-8992</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Kesheng</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7118-3877</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biostatistics and Epidemiology, College of Public Health</institution>
        <institution>East Tennessee State University</institution>
        <addr-line>Johnson City, TN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Cancer Survivorship Research Center, College of Nursing</institution>
        <institution>University of South Carolina</institution>
        <addr-line>Columbia, SC</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Epidemiology and Biostatistics, Arnold School of Public Health</institution>
        <institution>University of South Carolina</institution>
        <addr-line>Columbia, SC</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Ying Liu <email>liuy09@etsu.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>20</day>
        <month>4</month>
        <year>2026</year>
      </pub-date>
      <volume>14</volume>
      <elocation-id>e75862</elocation-id>
      <history>
        <date date-type="received">
          <day>11</day>
          <month>4</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>2</day>
          <month>6</month>
          <year>2025</year>
        </date>
        <date date-type="rev-recd">
          <day>9</day>
          <month>3</month>
          <year>2026</year>
        </date>
        <date date-type="accepted">
          <day>12</day>
          <month>3</month>
          <year>2026</year>
        </date>
      </history>
      <copyright-statement>©Ying Liu, Kesheng Wang. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 20.04.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2026/1/e75862" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Feature selection is the process of identifying the most informative and relevant features from a larger set of candidate features in machine learning (ML) models. The Boruta algorithm and the least absolute shrinkage and selection operator (LASSO) are 2 widely used methods.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to (1) compare several feature-selection strategies, including Boruta, LASSO, their intersection, principal component analysis (PCA), and a no–feature-selection baseline, and (2) evaluate ML models to predict cancer information–seeking behavior among US adults.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Data from 5505 individuals (2630 cancer information seekers and 2875 nonseekers) were selected from the 2022 Health Information National Trends Survey. The Boruta algorithm, LASSO, and PCA were used to perform feature selection of 73 variables. Five ML tools (the support vector machine algorithms, logistic regression [LR], random forest [RF], k-nearest neighbor, and extreme gradient boosting) were applied to develop ML models to predict cancer information–seeking. The area under the receiver operating characteristic curve (AUC) and the DeLong test were used to evaluate and compare the performance of the models. Stepwise LR analysis was performed to estimate the odds ratios and their 95% CIs for the associations of potential variables selected in ML analyses with the outcome.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Overall, 47.8% (2630/5505) of respondents reported seeking cancer information (949/2189, 43.4% of men; 1681/2189, 50.7% of women). RF achieved the highest AUC (0.781) and second-highest accuracy (0.714) using LASSO-selected variables, while the support vector machine with linear kernel and LR models using all 73 features yielded the highest accuracy (0.717). Notably, RF produced comparable AUCs when using Boruta-only features, LASSO-only features, or no feature selection yet (all 73 features); these AUCs were significantly higher than those derived from PCA components or from the 20 PCA-loading–based variables. Stepwise LR confirmed that 19 of the 27 shared variables selected by both Boruta and LASSO were independently associated with information seeking (<italic>P</italic>&lt;.05). The top predictors included a personal history of cancer, greater worry about developing cancer, a family history of cancer, non-Hispanic White race, higher household income, awareness of genetic testing, viewing health-related videos on social media, interest in cancer screening, being offered access to an online medical record, and knowledge of human papillomavirus.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Boruta and LASSO demonstrated strong and consistent performance in feature selection for predicting cancer information seeking, whereas PCA provided a dimension-reduced yet less predictive alternative. Findings offer actionable insights for tailoring public health communication strategies and improving engagement in cancer information resources among US adults.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>beliefs</kwd>
        <kwd>cancer information seeking</kwd>
        <kwd>data mining</kwd>
        <kwd>feature selection</kwd>
        <kwd>health behaviors</kwd>
        <kwd>knowledge</kwd>
        <kwd>machine learning</kwd>
        <kwd>PCA</kwd>
        <kwd>random forest</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>With the development of science and technology, people seek health information from various sources, including online and offline approaches, for different reasons, such as to understand personal or family members’ health conditions and to make decisions on health professionals’ recommendations [<xref ref-type="bibr" rid="ref1">1</xref>]. According to the United States National Cancer Institute (NCI) report, approximately 40.5% of people will have cancer in their lifetime based on 2017-2019 data. Over 2 million people were diagnosed in the United States in 2024 [<xref ref-type="bibr" rid="ref2">2</xref>]. Patients often actively seek cancer information to better understand their diagnosis, treatment options, and home care. Their family members frequently join in the search for additional information. Healthy people may seek cancer information to enlarge their knowledge and improve their quality of life, such as changing their diet to healthy food and having regular physical exams [<xref ref-type="bibr" rid="ref3">3</xref>]. Generally, cancer information–seeking behavior may increase knowledge, preventive behaviors, and screening behaviors; moreover, cancer information seekers may be more likely to adopt healthy lifestyle behaviors and get screened for cancer [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      <p>Several sociodemographic factors, such as being female, aged 55-64 years vs 40-44 years, having higher education, identifying as Black or Hispanic, and being married, have been positively associated with cancer information seeking in the US population. Racial disparities and variations by marital status and cancer status have also been reported [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Furthermore, behaviors such as alcohol use and tobacco smoking use, as well as certain chronic conditions (eg, cancer and anxiety), may influence cancer information seeking [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. Beliefs about cancer and knowledge of genetic testing have also been associated with cancer information–seeking behavior, though findings in these areas remain inconsistent [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      <p>Machine learning (ML) and predictive analytics are commonly used in many areas, and they can transform data into useful insights for better understanding and faster decision-making. ML methods can address high-dimensional data, model the etiological and clinical heterogeneity, and translate univariate variable findings into clinically useful multivariate decision support systems [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]. Feature selection is a critical step in ML, not only to reduce the dimensionality of the feature space, but also to reveal the most relevant features without losing too much information [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>]. Feature selection, as a preprocessing stage, is essentially the process of picking some informative and relevant features from a larger collection of features that produce a better characterization of patterns of multiple classes [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. Several feature-selection methods have been used in ML, such as the least absolute shrinkage and selection operator (LASSO) [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref35">35</xref>] and the Boruta algorithm [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. Principal component analysis (PCA) is an unsupervised feature reduction technique that explains the variance-covariance structure of a set of variables through linear combinations known as principal components (PCs) or factors. PCA has been used to reduce the dimensions, but it is not a feature-selection method because all variables remain in each factor [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>].</p>
      <p>Numerous studies have investigated cancer information–seeking behaviors [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. However, relatively limited research has applied ML approaches to systematically identify key factors associated with cancer information–seeking behaviors. The Health Information National Trends Survey (HINTS) conducted by the NCI is a nationally representative cross-sectional survey of civilian noninstitutionalized adults aged 18 years and older in the United States. HINTS collected comprehensive data on the access to, use of, and needs for health- and cancer-related information, as well as knowledge, perceptions, attitudes, and related health behaviors. It has been widely used to address cancer information seeking [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref20">20</xref>]. Several ML tools, such as logistic regression (LR), support vector machine (SVM), random forest (RF), k-nearest neighbor (KNN), and extreme gradient boosting (XGBoost), have been used in the classification of binge drinking, e-cigarette use, and severe psychological distress using HINTS data [<xref ref-type="bibr" rid="ref40">40</xref>-<xref ref-type="bibr" rid="ref43">43</xref>]. Therefore, this study aimed to (1) compare feature-selection methods—Boruta, LASSO, combination of Boruta and LASSO, and PCA-based methods—and (2) develop ML tools to predict cancer information seeking among US adults using the data from the 2022 Health Information National Trends Survey (HINTS 6).</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Sample</title>
        <p>The data for this study were selected from the HINTS 6, which included 6252 respondents. The HINTS is a nationally representative survey administered by the NCI since 2003. The HINTS targets adults aged 18 years or older in the civilian noninstitutionalized population of the United States. The HINTS, sponsored by the NCI, provides a unique opportunity to explore the characteristics of information seekers and nonseekers, as well as the content of information being sought by the public in a nationally representative sample. Data collection for HINTS 6 started on March 7, 2022, and concluded on November 8, 2022. The overall household response rate, based on the next-birthday method, was 28.1%.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The original HINTS 6 data collection by the NCI was designated “exempt research” under 45 CFR 46.104 and approved by the Westat Institutional Review Board on May 10, 2021 (project #6632.03.51), with a subsequent amendment approved on November 24, 2021 (amendment ID #3597). This study is a secondary analysis of HINTS 6, a publicly available, deidentified dataset. In accordance with US federal regulations (45 CFR 46) and institutional policies, secondary analyses of deidentified, publicly available data do not require additional institutional review board review. Additional details about the informed consent process, incentives, and methodology can be found in the HINTS 6 methodology report [<xref ref-type="bibr" rid="ref44">44</xref>].</p>
      </sec>
      <sec>
        <title>Outcome Variable</title>
        <p>Individuals were classified as cancer information seekers if they responded “yes” to the question “Have you ever looked for information about cancer from any source?” and those who responded “no” were classified as nonseekers.</p>
      </sec>
      <sec>
        <title>Data Processing of Predictors</title>
        <p>A total of 87 predictive variables (including demographic factors, alcohol and tobacco use, health care, medical record, chronic diseases, beliefs about cancer, social media, health and nutrition, etc) were included in the initial analysis. Previous simulation and real data analyses revealed that statistical analysis is likely to be biased if the percentage of missing values is more than 10% [<xref ref-type="bibr" rid="ref45">45</xref>-<xref ref-type="bibr" rid="ref47">47</xref>]. Therefore, variables with a missing value rate higher than 10% were removed for further analysis. Finally, 74 variables, including the outcome, were left. After excluding individuals with missing data on the outcome, age, gender, or race, the final sample size was 5505.</p>
        <p>Demographic characteristics included gender, age group (18-49 years, 50-64 years, 65-74 years, and 75 years or older), race, education, full-time work (yes or no), income, and health insurance (yes or no). Race was recoded as Hispanic, non-Hispanic White, non-Hispanic Black or African American, non-Hispanic Asian, and other. Education had 4 categories (less than high school, some college, bachelor’s degree, and postbaccalaureate degree). The 4 categories of annual income were &lt;US $19,999, US $20,000-US $49,999, US $50,000-US $74,999, and ≥US $75,000. <xref ref-type="table" rid="table1">Table 1</xref> lists the demographic variables.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Prevalence of cancer information seeking across demographic factors.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="350"/>
            <col width="140"/>
            <col width="170"/>
            <col width="200"/>
            <col width="110"/>
            <thead>
              <tr valign="bottom">
                <td colspan="2">Variable</td>
                <td>Total, n</td>
                <td>Seeking, n</td>
                <td>Prevalence, %<sup>a</sup></td>
                <td><italic>P</italic> value<sup>b</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">
                  <bold>Gender</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>2189</td>
                <td>949</td>
                <td>43.4</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>3316</td>
                <td>1681</td>
                <td>50.7</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Age group</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>18-49 years</td>
                <td>1935</td>
                <td>895</td>
                <td>46.3</td>
                <td>.01</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>50-64 years</td>
                <td>1607</td>
                <td>791</td>
                <td>49.2</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>65-74 years</td>
                <td>1234</td>
                <td>623</td>
                <td>50.5</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&gt;75 years</td>
                <td>729</td>
                <td>321</td>
                <td>44.0</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Race</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic White</td>
                <td>3175</td>
                <td>1738</td>
                <td>54.7</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic African American</td>
                <td>878</td>
                <td>314</td>
                <td>35.8</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic</td>
                <td>984</td>
                <td>371</td>
                <td>37.7</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic Asian</td>
                <td>286</td>
                <td>120</td>
                <td>42.0</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other</td>
                <td>182</td>
                <td>87</td>
                <td>47.8</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Education<sup>c</sup></bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Less than high school</td>
                <td>1300</td>
                <td>375</td>
                <td>28.8</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Some college</td>
                <td>1574</td>
                <td>721</td>
                <td>45.8</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Bachelor’s degree</td>
                <td>1550</td>
                <td>843</td>
                <td>54.4</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Postbaccalaureate degree</td>
                <td>1070</td>
                <td>687</td>
                <td>64.2</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Income (US $)</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&lt;19,999</td>
                <td>904</td>
                <td>271</td>
                <td>30.0</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>20,000-49,999</td>
                <td>1431</td>
                <td>569</td>
                <td>39.8</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>50,000-74,999</td>
                <td>946</td>
                <td>488</td>
                <td>51.6</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&gt;75,000</td>
                <td>2224</td>
                <td>1302</td>
                <td>58.5</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overall</td>
                <td>5505</td>
                <td>2630</td>
                <td>47.8</td>
                <td>
                  <break/>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>The prevalence is the ratio of seeking and total.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup><italic>P</italic> value is based on the chi-square test.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>11 participants had missing values in the education variable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Race was generated for dummy variables. Other predictive variables were binary, ordinal, or continuous. In this dataset, 70% (3854/5505) of the entries were used for training the models, leaving the remaining 30% (1651/5505) for the testing set. The missing values were imputed using the “knnImpute” method in caret based on the training data only [<xref ref-type="bibr" rid="ref48">48</xref>]. The derived imputation values were subsequently applied to the test data. The full list of 73 predicting variables is listed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. <xref rid="figure1" ref-type="fig">Figure 1</xref> shows an overview of the data curation and ML process.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview of data curation and machine learning workflow. LASSO: least absolute shrinkage and selection operator; PCA: principal component analysis.</p>
          </caption>
          <graphic xlink:href="medinform_v14i1e75862_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Feature-Selection Methods</title>
        <p>Feature selection was performed within the training data. The Boruta algorithm, implemented in R software (version 4.5.2; R Foundation for Statistical Computing) via the “Boruta” package, automatically performs feature selection on a dataset using an RF classifier [<xref ref-type="bibr" rid="ref49">49</xref>]. The LASSO feature selection was applied using the “glmnet” package in R software [<xref ref-type="bibr" rid="ref50">50</xref>]. This method regularizes the model by applying a penalty (λ), shrinking the regression coefficients, and reducing some of them to 0. The feature-selection phase occurs after the shrinkage, where non-0 values are selected as model parameters.</p>
        <p>The PCA is a dimensionality reduction technique that transforms a set of correlated variables into a smaller set of uncorrelated variables called PCs. Generally, the first PC will be the linear combination of the variables that captures the maximum amount of information in the data and will be correlated with at least some of the observed variables, while the second PC identified accounts for the second-largest amount of variance in the data and is uncorrelated with the first PC, and so on. Eigenvalues indicate the amount of variance explained by each PC. A scree plot was used to visualize eigenvalues. The eigenvalue-one criterion (eigenvalue ≥1) is commonly used to decide how many PCs to retain. Eigenvectors are the weights used to calculate PC scores. The PC score is a linear combination of observed variables weighted by eigenvectors. Assume there are n individuals and k observed predictors included in the PCA, then there are k PCs in total. The equation of the PC score for each individual can be written as:</p>
        <disp-formula>PC<sub>ij</sub> = b<sub>1j</sub>x<sub>i1</sub> + b<sub>2j</sub>x<sub>i2</sub> + ... + b<sub>qj</sub>x<sub>iq</sub></disp-formula>
        <p>where</p>
        <disp-formula>PC<sub>ij</sub> = the jth PC score for the ith individual, i = 1, 2, ..., n; j = 1, 2, ..., k</disp-formula>
        <disp-formula>b<sub>kj</sub> = the regression coefficient for observed variable k in <sub>the</sub> jth PC</disp-formula>
        <disp-formula>X<sub>ik</sub> = the value of individual i on the observed variable k</disp-formula>
        <disp-formula>q = the number of PCs, q = 1, 2, ..., k</disp-formula>
        <p>In PCA, the factor loading of a variable represents the correlation between the original variable and a given PC. The loading indicates how much each original variable contributes to a specific PC. Large absolute values of loadings indicate that the corresponding variable has a strong relationship with that particular PC. A factor loading of a variable is considered large if its absolute value exceeds 50%. The PCA was performed using SPSS software (version 31; IBM Corp). For ML analysis, the PC scores for each individual were initially used as predictors. Furthermore, from each of the uncorrelated PCs, 1 variable with the highest loading/correlation coefficient with the PC was chosen for further ML analysis.</p>
      </sec>
      <sec>
        <title>ML Methods</title>
        <p>A total of 5 ML algorithms were used, including LR, SVM, RF, KNN, and XGBoost. The caret package, incorporating other packages in R, was used for LR, KNN, RF, SVM, and XGBoost [<xref ref-type="bibr" rid="ref48">48</xref>]. A 10-fold cross-validation approach was applied, and multiple parameters for each algorithm were optimized using a grid search.</p>
        <p>For the LR model, the “glmnet” in the caret package was used. In the grid search, we set alpha = 0:1 and lambda = seq(0.001, 1, length = 10).</p>
        <p>The SVM algorithm includes linear kernel and radial kernel [<xref ref-type="bibr" rid="ref51">51</xref>]. In the grid search, we set C = c(0.01, 0.1, 0.2, 0.5, 1, 2)) for the linear kernel; sigma = c(0.05, 0.25, 0.5, 1, 2) and C = c(0.05, 0.25, 0.5, 1, 2)) for the radial kernel.</p>
        <p>The RF algorithm randomly selects a subset of variables to construct multiple decision trees (DTs) [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref53">53</xref>]. In the grid search, we set mtry = c(1:15) and ntree = 300, where the mtry parameter refers to the number of variables used in each random tree, while ntree refers to the number of trees that the forest contains. The mtry range (1-15) was chosen to cover and exceed the conventional default (√p ≈ 8-9) while limiting overfitting and computational burden.</p>
        <p>KNN, a simple ML algorithm based on a clustering algorithm with supervised learning, calculates the average of the numerical target of the k-nearest neighbors [<xref ref-type="bibr" rid="ref54">54</xref>]. KNN is more suitable for low-dimensional data with a small number of input variables. In the grid search, we set k=1:20.</p>
        <p>XGBoost [<xref ref-type="bibr" rid="ref55">55</xref>] is a supervised ML method for regression and classification tasks similar to the RF classifier. In the grid search, we set the nrounds = c(200,300), max_depth = c(6, 10, 20), colsample_bytree = c(0.5, 1.0), eta = c(0.1, 0.3), gamma= c(0, 0.5), min_child_weight = c(1,2), and subsample = c(0.75, 1.0).</p>
      </sec>
      <sec>
        <title>Performance of ML</title>
        <p>To evaluate the performance of feature-selection methods, we used several metrics, including accuracy, recall (sensitivity), specificity, precision (positive predictive value), <italic>F</italic><sub>1</sub>-score, and area under the receiver operating characteristic curve (AUC). The R packages used included “caret,” “kernlab,” and “ROCR.”</p>
        <graphic xlink:href="medinform_v14i1e75862_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <graphic xlink:href="medinform_v14i1e75862_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <graphic xlink:href="medinform_v14i1e75862_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <graphic xlink:href="medinform_v14i1e75862_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <graphic xlink:href="medinform_v14i1e75862_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>where <italic>TP</italic> is the number of true positives, <italic>TN</italic> is the number of true negatives, <italic>FP</italic> is the number of false positives, and <italic>FN</italic> is the number of false negatives. The <italic>F</italic><sub>1</sub>-score is a harmonic mean that combines both recall and precision. The DeLong test was used to compare the statistical differences of the AUC between different models [<xref ref-type="bibr" rid="ref56">56</xref>].</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>The categorical variables were presented in their raw values along with the proportions for categorical variables. The chi-square test was used to examine the associations of categorical variables with cancer information seeking across demographic variables. Stepwise LR analysis was performed to estimate the odds ratios (ORs) and their 95% CIs for the associations of potential factors selected in ML analyses with the outcome. All statistical analyses were performed using SPSS software (version 31).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Prevalence of Cancer Information Seeking</title>
        <p>Among the 5505 adult respondents, 2630 were classified as cancer information seekers and 2875 as nonseekers (<xref ref-type="table" rid="table1">Table 1</xref>). The overall prevalence was 47.8% (2630/5505; 949/2189, 43.4% for men and 1681/2189, 50.7% for women). The prevalence increased with age (895/1935, 46.3%; 791/1607, 49.2%; 623/1234, 50.5% for age groups 18-49, 50-64, and 65-74 years, respectively). The age group of &gt;75 years had a lower prevalence (321/729, 44%). The prevalence was higher in those with higher education and higher income.</p>
      </sec>
      <sec>
        <title>Feature Selection</title>
        <p>The Boruta algorithm selected 43 variables, and LASSO selected 42 variables related to cancer information seeking (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Of these, 27 variables were identified by both methods. The PCA identified 20 uncorrelated PCs with eigenvalues &gt;1 (<xref rid="figure2" ref-type="fig">Figure 2</xref> and <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). From each PC, we selected the variable with the highest loading/correlation (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Scree plot showing 20 principal components with eigenvalues higher than 1.</p>
          </caption>
          <graphic xlink:href="medinform_v14i1e75862_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>ML Performance</title>
        <p>The performance statistics are summarized in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>. RF achieved the highest AUC (0.781) and second-highest accuracy (0.714) using 42 LASSO-selected variables, while using all 73 features yielded the highest accuracy (0.717). Using the 27 common variables identified by both Boruta and LASSO, the RF model achieved the highest predictive accuracy (0.711), the same as using SVM with a linear kernel, closely followed by SVM with a radial basis function (RBF) kernel (0.708) and LR (0.708). When using 20 PC scores and selecting 1 variable from each of 20 PCs with the highest loading with the PC, the RF models showed lower accuracy (0.694 and 0.678, respectively). In <xref rid="figure3" ref-type="fig">Figure 3</xref>, the mean decrease in accuracy and the mean decrease in Gini metrics from the RF algorithm are shown for the 27 variables identified by LASSO and Boruta. <xref rid="figure4" ref-type="fig">Figure 4</xref> displays the same metrics for the 20 variables derived from each PC. Based on the Gini values in <xref rid="figure3" ref-type="fig">Figure 3</xref>, the strongest predictors included cancer-related worry, income, a family history of cancer, interest in cancer screening, meaning and purpose in life, education, more frequent provider visits, having a cancer diagnosis, being offered access to an online medical record, watching health-related videos on social media, and awareness of genetic testing. Plot of mean decrease accuracy and mean decrease Gini values using RF algorithm, XGBoost, gradient boosting machine, LR models, and the 21 factors are illustrated in Figures S1-S4 in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Plot of mean decrease accuracy (left panel) and mean decrease Gini (right panel) values using the random forest algorithm and 27 variables selected by both Boruta and least absolute shrinkage and selection operator.</p>
          </caption>
          <graphic xlink:href="medinform_v14i1e75862_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Plot of mean decrease accuracy (left panel) and mean decrease Gini (right panel) values using the random forest algorithm and 20 variables with the highest loading from 20 PCs.</p>
          </caption>
          <graphic xlink:href="medinform_v14i1e75862_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>To determine whether the RF model had a significantly higher AUC than other models, the DeLong test was used. The AUC differences between RF and other models, 95% CIs of the AUC difference, and <italic>P</italic> values are illustrated in <xref ref-type="table" rid="table2">Table 2</xref>. The RF model yielded comparable AUC values with SVM with a linear kernel and LR, except for using PCA-based 20 variables (<italic>P</italic>&gt;.05). In contrast, the AUCs from these RF models were significantly higher than those obtained using SVM with an RBF kernel, KNN, and XGBoost (<italic>P</italic>&lt;.05), except for SVM with an RBF kernel using 20 PC scores and XGBoost using LASSO and PCA-based 20 variables. <xref rid="figure5" ref-type="fig">Figure 5</xref> presents the AUC in the test data for each ML model across the 6 feature-selection methods.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Comparison of machine learning models with random forest using the DeLong test of area under the receiver operating characteristic curve (AUC) difference.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="110"/>
            <col width="70"/>
            <col width="110"/>
            <col width="70"/>
            <col width="0"/>
            <col width="110"/>
            <col width="70"/>
            <col width="0"/>
            <col width="110"/>
            <col width="70"/>
            <col width="0"/>
            <col width="110"/>
            <col width="70"/>
            <thead>
              <tr valign="bottom">
                <td>Features</td>
                <td colspan="2">SVM<sup>a</sup>_Linear</td>
                <td colspan="3">SVM_RBF<sup>b</sup></td>
                <td colspan="3">LR<sup>c</sup></td>
                <td colspan="3">KNN<sup>d</sup></td>
                <td colspan="2">XGBoost<sup>e</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AUC difference (95% CI)</td>
                <td><italic>P</italic> value</td>
                <td>AUC difference (95% CI)</td>
                <td><italic>P</italic> value</td>
                <td colspan="2">AUC difference (95% CI)</td>
                <td><italic>P</italic> value</td>
                <td colspan="2">AUC difference (95% CI)</td>
                <td><italic>P</italic> value</td>
                <td colspan="2">AUC difference (95% CI)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>All features</td>
                <td>0.0007 (–0.0101 to 0.0115)</td>
                <td>.89</td>
                <td>0.0509 (0.0359 to 0.0659)</td>
                <td>&lt;.001<sup>f</sup></td>
                <td colspan="2">–0.0012 (–0.0098 to 0.0074)</td>
                <td>.78</td>
                <td colspan="2">0.0464 (0.0306 to 0.0622)</td>
                <td>&lt;.001<sup>f</sup></td>
                <td colspan="2">0.0109 (0.0015 to 0.0204)</td>
                <td>.02<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td>LASSO<sup>g</sup></td>
                <td>–0.0019 (–0.0113 to 0.0075)</td>
                <td>.69</td>
                <td>0.0117 (0.0014 to 0.0219)</td>
                <td>.03<sup>f</sup></td>
                <td colspan="2">–0.0044 (–0.0135 to 0.0047)</td>
                <td>.34</td>
                <td colspan="2">0.0288 (0.0156 to 0.0420)</td>
                <td>&lt;.001<sup>f</sup></td>
                <td colspan="2">0.0057 (–0.0037 to 0.0152)</td>
                <td>.24</td>
              </tr>
              <tr valign="top">
                <td>Boruta</td>
                <td>0.0057 (–0.0037 to 0.0151)</td>
                <td>.23</td>
                <td>0.0165 (0.0054 to 0.0276)</td>
                <td>.004<sup>f</sup></td>
                <td colspan="2">0.0048 (–0.0045 to 0.0142)</td>
                <td>.31</td>
                <td colspan="2">0.0373 (0.0228 to 0.0519)</td>
                <td>&lt;.001<sup>f</sup></td>
                <td colspan="2">0.0090 (0.0002 to 0.0179)</td>
                <td>.04<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td>LASSO and Boruta</td>
                <td>0.0005 (–0.0074 to 0.0084)</td>
                <td>.91</td>
                <td>0.0091 (0.0007 to 0.0174)</td>
                <td>.04<sup>f</sup></td>
                <td colspan="2">0.0005 (–0.0078 to 0.0088)</td>
                <td>.90</td>
                <td colspan="2">0.0256 (0.0132 to 0.0380)</td>
                <td>&lt;.001<sup>f</sup></td>
                <td colspan="2">0.0182 (0.0089 to 0.0207)</td>
                <td>.001<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td>PCA<sup>h</sup> score</td>
                <td>–0.0082 (–0.0190 to 0.0026)</td>
                <td>.14</td>
                <td>0.0032 (–0.0078 to 0.0143)</td>
                <td>.57</td>
                <td colspan="2">–0.0089 (–0.0197 to 0.0019)</td>
                <td>.11</td>
                <td colspan="2">0.0438 (0.0278 to 0.0599)</td>
                <td>&lt;.001<sup>f</sup></td>
                <td colspan="2">0.0278 (0.0117 to 0.0338)</td>
                <td>&lt;.001<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td>PCA (highest loading variable)</td>
                <td>0.0189 (0.0084 to 0.0293)</td>
                <td>&lt;.001<sup>f</sup></td>
                <td>0.0238 (0.0133 to 0.0342)</td>
                <td>&lt;.001<sup>f</sup></td>
                <td colspan="2">0.0173 (0.0070 to 0.0276)</td>
                <td>.001<sup>f</sup></td>
                <td colspan="2">0.0469 (0.0303 to 0.0635)</td>
                <td>&lt;.001<sup>f</sup></td>
                <td colspan="2">0.0105 (–0.0029 to 0.0238)</td>
                <td>.13</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>SVM: Support vector machine.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>RBF: Radial basis function.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>LR: Logistic regression.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>KNN: k-nearest neighbor.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>XGBoost: Extreme gradient boosting.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup><italic>P</italic>&lt;.05 based on DeLong test.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>LASSO: The least absolute shrinkage and selection operator.</p>
            </fn>
            <fn id="table2fn8">
              <p><sup>h</sup>PCA: Principal component analysis.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>AUC curves in the test data in comparison of RF with other five ML models. (a) All features, (b) LASSO, (c) Boruta, (d) LASSO and Boruta, (e) 20 PCA scores, (f) 20 highest loading variables from 20 PCs.</p>
          </caption>
          <graphic xlink:href="medinform_v14i1e75862_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>To evaluate whether differences exist among feature-selection methods, the RF model and the DeLong test were used. The pairwise AUC differences among 6 methods, 95% CIs of the AUC difference, and <italic>P</italic> values are illustrated in <xref ref-type="table" rid="table3">Table 3</xref>. Boruta, LASSO, and using all 73 features did not show significant differences (<italic>P</italic>&gt;.05), whereas these methods showed higher AUC than methods using PCA score and PCA-based selection of highest loading variables (<italic>P</italic>&lt;.05). The combined method of Boruta and LASSO did not show a significant difference from LASSO-only, whereas it showed lower AUC than methods using all 73 features and Boruta, but higher AUC than methods using PCA score and PCA-based selection of highest loading variables (<italic>P</italic>&lt;.05). Furthermore, using PCA scores had a higher AUC than using 20 PCA-based variables (<italic>P</italic>&lt;.05). AUC in the test data for all ML models across the 6 feature-selection methods are illustrated in <xref rid="figure6" ref-type="fig">Figure 6</xref>.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Comparison of feature-selection methods using random forest and the DeLong test of area under the receiver operating characteristic curve (AUC) difference.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="110"/>
            <col width="70"/>
            <col width="110"/>
            <col width="70"/>
            <col width="110"/>
            <col width="70"/>
            <col width="110"/>
            <col width="70"/>
            <col width="110"/>
            <col width="70"/>
            <thead>
              <tr valign="bottom">
                <td>Features</td>
                <td colspan="2">Boruta</td>
                <td colspan="2">LASSO<sup>a</sup></td>
                <td colspan="2">Boruta and LASSO</td>
                <td colspan="2">PCA<sup>b</sup> score</td>
                <td colspan="2">PCA (highest loading variable)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AUC difference (95% CI)</td>
                <td><italic>P</italic> value</td>
                <td>AUC difference (95% CI)</td>
                <td><italic>P</italic> value</td>
                <td>AUC difference (95% CI)</td>
                <td><italic>P</italic> value</td>
                <td>AUC difference (95% CI)</td>
                <td><italic>P</italic> value</td>
                <td>AUC difference (95% CI)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>All features</td>
                <td>0.0011 (–0.0035 to 0.0057)</td>
                <td>.64</td>
                <td>0.0054 (–0.00086 to 0.0116)</td>
                <td>.09</td>
                <td>0.0080 (0.0013 to 0.0146)</td>
                <td>.02<sup>c</sup></td>
                <td>0.0230 (0.0109 to 0.0351)</td>
                <td>&lt;.001<sup>c</sup></td>
                <td>0.0492 (0.0339 to 0.0644)</td>
                <td>&lt;.001<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>Boruta</td>
                <td>—<sup>d</sup></td>
                <td>—</td>
                <td>0.0043 (–0.0028 to 0.0114)</td>
                <td>.23</td>
                <td>0.0069 (0.0012 to 0.0125)</td>
                <td>.02<sup>c</sup></td>
                <td>0.0219 (0.0093 to 0.0345)</td>
                <td>&lt;.001<sup>c</sup></td>
                <td>0.0481 (0.0315 to 0.0646)</td>
                <td>&lt;.001<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>LASSO</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>0.0026 (–0.0028 to 0.0079)</td>
                <td>.35</td>
                <td>0.0176 (0.0050 to 0.0303)</td>
                <td>.006<sup>c</sup></td>
                <td>0.0438 (0.0274 to 0.0601)</td>
                <td>&lt;.001<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>LASSO and Boruta</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>0.0150 (0.0018 to 0.0283)</td>
                <td>.03<sup>c</sup></td>
                <td>0.0412 (0.0239 to 0.0583)</td>
                <td>&lt;.001<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>PCA score</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>0.0262 (0.0081 to 0.0442)</td>
                <td>.005<sup>c</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>LASSO: least absolute shrinkage and selection operator.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>PCA: principal component analysis.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup><italic>P</italic>&lt;.05 based on DeLong test.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Area under the receiver operating characteristic curve in the test data using the random forest model for feature-selection methods. LASSO: least absolute shrinkage and selection operator; PCA: principal component analysis.</p>
          </caption>
          <graphic xlink:href="medinform_v14i1e75862_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Stepwise LR Analysis</title>
        <p>For the stepwise LR, we used the single training data, where both the Boruta algorithm and LASSO selected 27 common variables for cancer information seeking. Stepwise LR of the 27 variables identified by both Boruta and LASSO, and further confirmed that 19 variables were associated with cancer information seeking (<xref ref-type="table" rid="table4">Table 4</xref>). Based on ORs, the top predictors included having a cancer diagnosis (OR 1.46, 95% CI 1.35-1.59), greater worry about developing cancer (OR 1.41, 95% CI 1.31-1.53), a family history of cancer (OR 1.35, 95% CI 1.25-1.47), White (OR 1.25, 95% CI 1.12-1.32), having a higher household income (OR 1.24, 95% CI 1.13-1.35), having heard of genetic testing (OR 1.24, 95% CI 1.14-1.35), watching health-related videos on social media (OR 1.24, 95% CI 1.15-1.34), interest in cancer screening (OR 1.20, 95% CI 1.11-1.29), being offered access to an online medical record (OR 1.17, 95% CI 1.08-1.27), and knowledge of human papillomavirus (HPV; OR 1.17, 95% CI 1.08-1.27).</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Stepwise logistic regression analyses of 27 variables.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="650"/>
            <col width="220"/>
            <col width="130"/>
            <thead>
              <tr valign="bottom">
                <td>Variable</td>
                <td>aOR<sup>a</sup> (95% CI)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Race (White; reference=other races)</td>
                <td>1.25 (1.12-1.32)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Education (high school or above; reference=less than high school)</td>
                <td>1.16 (1.06-1.26)</td>
                <td>.001</td>
              </tr>
              <tr valign="top">
                <td>Income (1-4; 4=&gt;US $75,000)</td>
                <td>1.24 (1.13-1.35)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Alcohol_increase_cancer (1-4; 4=a lot)</td>
                <td>1.16 (1.08-1.26)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Confident_internet_health resource (1-5; 5=very confident)</td>
                <td>1.12 (1.03-1.21)</td>
                <td>.007</td>
              </tr>
              <tr valign="top">
                <td>Diabetes (yes; reference=no)</td>
                <td>0.91 (0.84-0.99)</td>
                <td>.02</td>
              </tr>
              <tr valign="top">
                <td>Cancer (yes; reference=no)</td>
                <td>1.46 (1.35-1.59)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Family had cancer (yes; reference=no)</td>
                <td>1.35 (1.25-1.47)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Heard_genetic_test (yes; reference=no)</td>
                <td>1.24 (1.14-1.35)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Heard_HPV (yes; reference=no)</td>
                <td>1.17 (1.08-1.27)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Doctor told colorectal cancer tests (yes; reference=never)</td>
                <td>1.12 (1.04-1.21)</td>
                <td>.004</td>
              </tr>
              <tr valign="top">
                <td>Interested in cancer screening (0-4; 4=very)</td>
                <td>1.20 (1.11-1.29)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Prevention of cancer not possible (1-4; 4=strongly agree)</td>
                <td>1.16 (1.07-1.25)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Worry_get_cancer (1-5; 5=extremely)</td>
                <td>1.41 (1.31-1.53)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Frequency_go_provider last year (1-6; 6=10 or more times)</td>
                <td>1.12 (1.04-1.21)</td>
                <td>.004</td>
              </tr>
              <tr valign="top">
                <td>Offer_access online medical record (yes; reference=no)</td>
                <td>1.17 (1.08-1.27)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Meaning in life (1-5; 5=a lot)</td>
                <td>1.09 (1.01-1.18)</td>
                <td>.03</td>
              </tr>
              <tr valign="top">
                <td>Social_media_misleading health information (1-5; 5=a lot)</td>
                <td>1.09 (1.01-1.17)</td>
                <td>.03</td>
              </tr>
              <tr valign="top">
                <td>Social_media_watch_video (1-5; 5=almost everyday)</td>
                <td>1.24 (1.15-1.34)</td>
                <td>&lt;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>aOR: adjusted odds ratio</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study compared 3 feature-selection approaches, including Boruta, LASSO, and PCA, and their variations across 5 ML models to predict cancer information–seeking behavior. Both Boruta and LASSO identified the same set of 27 variables, whereas PCA produced 20 uncorrelated PCs. Based on AUC, the RF model emerged as the best-performing algorithm, yielding comparable AUC values when using Boruta-selected features, LASSO-selected features, or no feature selection at all. In addition, a stepwise LR using the 27 variables identified by both Boruta and LASSO confirmed that 19 variables were significantly associated with cancer information seeking (<italic>P</italic>&lt;.05). The top predictors included having a cancer diagnosis, prior awareness of genetic testing, higher household income, a family history of cancer, being offered access to an online medical record, knowledge of HPV, worry about developing cancer, watching health-related videos on social media, and interest in cancer screening.</p>
      </sec>
      <sec>
        <title>Comparison to Prior Work</title>
        <p>Feature selection is a critical step in ML to reduce the dimensionality while retaining the most relevant features without excessive information loss [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>]. Previous studies have used several feature-selection methods, including LASSO, Boruta, and RF [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref35">35</xref>]. In this study, we applied Boruta and LASSO methods and selected 27 overlapping variables (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> and <xref rid="figure3" ref-type="fig">Figure 3</xref>). The Boruta algorithm is a feature-selection method based on RF and considers important variables, but does not account for collinearity while selecting variables, whereas LASSO regression is useful when multicollinearity exists in the model [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. The LASSO can be used as a variable selection method because numerous <italic>β</italic> coefficients that are not strongly associated with the outcome are decreased to 0, which is equivalent to removing those variables from the model. However, the disadvantage of LASSO is that it assumes a more restrictive set of assumptions than RF. For example, the LASSO is a linear model, so anything that matters for your outcome that is not linear in the parameters under estimation is at risk of getting eliminated. Most variables selected by LASSO were also selected by Boruta, which independently confirmed the association between those variables and cancer information–seeking behavior. Previous studies have revealed that combining these methods can lead to more robust feature selection, potentially improving model performance and interpretability [<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref60">60</xref>]. In addition, PCA has been used to reduce the dimensions, but it is not a feature-selection method because all variables remain in each PC [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>]; however, PCA can help identify the most important features by examining their contributions (loadings) to the PCs. This study initially conducted data mining using PCA and selected important uncorrelated PCs with eigenvalues &gt;1.0, and for ML analysis, the PC scores were used as predictors, whereas all variables remained in each PC. Notably, based on each of the uncorrelated PCs, we performed feature selection by choosing 1 variable with the highest loading/correlation coefficient with the PC for ML analysis. In this study, PCA identified 20 uncorrelated PCs, while 20 features were selected from these PCs for the development of ML models. Boruta, LASSO, and the use of all 73 features did not show significant differences. However, these methods showed higher AUC than methods using PCA scores and PCA-based selection of the highest loading variables. The combined method of Boruta-LASSO did not yield a statistically significant improvement in model performance compared with LASSO alone. Moreover, the Boruta-LASSO method demonstrated lower discrimination than models using all 73 features or Boruta alone, but achieved higher AUC than models based on PCA-derived features or PCA-based selection of variables with the highest loadings. In addition, models using PCA scores directly showed better performance than models constructed using a reduced set of 20 variables selected based on PCA loadings.</p>
        <p>ML methods can accommodate a large number of predictors and capture complex relationships among variables, making them well-suited for predicting health information–seeking behaviors. For example, 3 ML algorithms such as LR, SVM, and RF were applied in predicting the information-seeking behavior of clinicians using an electronic medical record system [<xref ref-type="bibr" rid="ref61">61</xref>]. Furthermore, LASSO was used to predict health information–seeking behaviors [<xref ref-type="bibr" rid="ref62">62</xref>], and elastic net and LASSO models were used in predicting internet health seeking [<xref ref-type="bibr" rid="ref63">63</xref>]. Another study used 4 algorithms (ie, RF, SVM, Bayes generalized linear model, gradient boosting, and an ensemble of the individual methods) to identify search terms and patterns that correlate with changes in obesity and overweight prevalence across Africa [<xref ref-type="bibr" rid="ref64">64</xref>]. However, limited research has applied ML approaches to systematically identify key factors associated with cancer information–seeking behaviors. This study compared 5 ML tools using 10-fold cross-validation and tested multiple parameters for each algorithm using a grid search for optimal performance. Using 42 LASSO-selected variables, RF achieved the highest AUC (0.781) and second-highest accuracy (0.714). Using the 27 common variables identified by both Boruta and LASSO, the RF model achieved the highest predictive accuracy (0.711), the same as using SVM with linear kernel, closely followed by SVM with RBF kernel (0.708) and LR (0.708). Previous studies have shown that RF is one of the best ML tools. For example, 1 previous study evaluated 10 ML classifiers, including LR, linear discriminant analysis, naive Bayes, KNN, SVM with RBF kernel, DT, RF, XGBoost, AdaBoost, and artificial neural network (ANN), and found that the RF model and the SVM model showed the best performance [<xref ref-type="bibr" rid="ref65">65</xref>]. Another study developed 7 ML models (LR, KNN, SVM, DT, RF, XGBoost, and ANN) and found that RF and ANN models with the same AUC outperformed other models [<xref ref-type="bibr" rid="ref66">66</xref>]. Another study compared LR, KNN, gradient boosting, XGBoost, RF, multilayer perceptron, and SVM for diagnosing breast cancer and found that RF achieved the maximum accuracy of 90.68% [<xref ref-type="bibr" rid="ref67">67</xref>]. However, 1 recent study compared 6 ML algorithms (XGBoost, LR, SVM, RF, KNN, and DT) in developing prognostic models for patients with alpha-fetoprotein–positive hepatocellular carcinoma, and the XGBoost model performed the best, and RF was the second best [<xref ref-type="bibr" rid="ref68">68</xref>]. Another recent study compared 5 ML models (SVM, XGBoost, Gaussian naïve Bayes, adaptive boosting, and RF) and found that XGBoost and RF achieved superior predictive performance, as evidenced by higher AUCs [<xref ref-type="bibr" rid="ref69">69</xref>], whereas another study did not find differences among LR, SVM, and RF [<xref ref-type="bibr" rid="ref70">70</xref>]. As can be seen, there are studies in the literature on the use of ML algorithms in the diagnosis of different types of cancers, as well as other diseases and conditions. The comparisons of ML tools may show heterogeneity.</p>
        <p>Furthermore, although LASSO was our primary feature-selection method due to its advantages in handling multicollinearity, high-dimensional data, and overfitting, we used stepwise regression as a secondary, confirmatory analysis for several reasons. First, LASSO selects variables by shrinking coefficients through penalization, but it does not provide traditional inferential statistics such as SEs, <italic>P</italic> values, or likelihood-based model comparison, which are still expected in many epidemiologic and clinical research settings. Stepwise regression allowed us to evaluate whether LASSO-identified predictors remained significant under a conventional regression framework. Second, stepwise selection served as a sensitivity analysis, enabling assessment of the robustness and stability of the LASSO-identified variable set. Recent methodological papers recommend combining penalized regression with stepwise or likelihood-ratio–based checks when the goal includes both prediction and interpretation [<xref ref-type="bibr" rid="ref71">71</xref>,<xref ref-type="bibr" rid="ref72">72</xref>]. Accordingly, stepwise regression was used not as the primary modeling strategy, but as a supplementary sensitivity analysis to evaluate the robustness and interpretability of the variables selected by LASSO.</p>
        <p>Patient with cancer often seek information regarding their diagnosis, treatment options, treatment costs, potential side effects, and the implications for daily life and survival. While physicians remain a primary source of such information, patients frequently turn to additional resources such as the internet and books to supplement their understanding [<xref ref-type="bibr" rid="ref12">12</xref>]. A previous study has identified that various factors influence cancer information–seeking behaviors, including gender, education levels, income, and cancer type [<xref ref-type="bibr" rid="ref3">3</xref>]. In this study, ML techniques were used to identify key predictors of cancer information–seeking behavior. Furthermore, 18 out of 27 selected variables from LASSO and Boruta were confirmed to be significantly associated with cancer information seeking by a stepwise LR model (<xref ref-type="table" rid="table4">Table 4</xref>). Consistent with prior research, variables including higher educational attainment, race/ethnicity, personal cancer history, family history of cancer, cancer-related beliefs, knowledge of genetic testing, and awareness of HPV were included in the analysis [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Furthermore, this study expanded the existing literature by identifying several additional predictors of cancer information–seeking behavior. These included beliefs that certain nutritional factors (such as red meat and alcohol consumption) and climate change increase cancer risk, engagement in social media activities, having access to online medical records, more frequent provider visits, and interest in cancer screening.</p>
        <p>Social media has become a main platform and important resource for people to obtain and exchange health-related information and advice [<xref ref-type="bibr" rid="ref73">73</xref>,<xref ref-type="bibr" rid="ref74">74</xref>], and has become a new channel for promoting cancer prevention [<xref ref-type="bibr" rid="ref75">75</xref>]. For example, 1 study found that young adults with cancer used social media to connect with cancer peers for support [<xref ref-type="bibr" rid="ref76">76</xref>]. Another study showed that social media could enable the seeking and sharing of breast cancer–related information, and enhance patient education, communication, engagement, and empowerment [<xref ref-type="bibr" rid="ref77">77</xref>]. Social media platforms may increase access to health information and decision aids [<xref ref-type="bibr" rid="ref78">78</xref>]. However, while social media can make health information more accessible, the use of social media for health information seeking can also create the risk of harm through exposure to misinformation. Because misinformation perceptions can affect attitudes and behaviors, a better understanding of the public’s perceptions of health misinformation on social media and their ability to detect it, as well as possible subgroup differences in such perceptions, is needed [<xref ref-type="bibr" rid="ref79">79</xref>]. However, misinformation and disinformation on social media have become widespread, which can lead to a lack of trust in health information sources and, in turn, lead to negative health outcomes [<xref ref-type="bibr" rid="ref80">80</xref>]. In this study, 3 variables related to social media use were selected by both Boruta and LASSO (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), including social_media_share_general_health (1-5, 5 = almost everyday), social_media_watch_video (1-5, 5 = almost everyday), and social_media_misleading (1-5, 5= a lot). Stepwise LR further confirmed these variables, including social_media_watch_video and social_media_misleading (<xref ref-type="table" rid="table4">Table 4</xref>). These findings highlight that people involved in social media activities have increased odds of seeking cancer information. Furthermore, it has been shown that most social media users perceive some (46%) or a lot (36%) of false or misleading health information on social media using HINTS 6 data [<xref ref-type="bibr" rid="ref80">80</xref>]. This study added that most social media users with a high prevalence of false and misleading health information on social media are positively seeking cancer information.</p>
        <p>This study further added that beliefs about alcohol use causing cancer were associated with increased odds of cancer information seeking. Alcohol consumption increases the risk of several types of cancer, including liver, esophageal, colorectal, and breast cancer; however, public awareness of the association between alcohol use and cancer remains low and varies by type of alcoholic beverage [<xref ref-type="bibr" rid="ref81">81</xref>-<xref ref-type="bibr" rid="ref83">83</xref>]. For example, using the HINTS (2020) data, 1 study found that awareness of the alcohol-cancer link was highest for liquor (31.2%), followed by beer (24.9%) and wine (20.3%). More US adults believed wine (10.3%) decreased cancer risk, compared with beer (2.2%) and liquor (1.7%). Most US adults (&gt;50%) reported not knowing how these beverages affected cancer risk [<xref ref-type="bibr" rid="ref82">82</xref>]. Another study using the HINTS (2020) data found that 34% of those reporting current alcohol consumption believed that drinking wine decreases or has no effect on cancer risk, compared with 20.8% of those reporting no alcohol consumption [<xref ref-type="bibr" rid="ref84">84</xref>]. A recent study using several HINTS cycle datasets did not find significant differences in diet-related cancer risk awareness and behaviors between cancer survivors and those without a history of cancer [<xref ref-type="bibr" rid="ref85">85</xref>]. Among the European Union general population, awareness of the link between alcohol and breast cancer ranged between 10% and 20%, head and neck cancer (15%-25%), colorectal and esophagus cancer (15%-45%), and liver cancer (40%). Awareness was higher among young people and specialized health professions and lower among women (the latter specifically for breast cancer) [<xref ref-type="bibr" rid="ref83">83</xref>].</p>
      </sec>
      <sec>
        <title>Practical Implications</title>
        <p>This study identified a set of variables associated with cancer information seeking. We address 2 implications. First, social media users had higher odds of seeking cancer information. Previous studies found that web-based infotainment videos are an effective approach in increasing public understanding about science and health care among web-based health information seekers and are a useful and effective approach in relaying complex health information, motivating interested viewers to seek additional health information, and driving public audiences to credible and reliable sources of information [<xref ref-type="bibr" rid="ref86">86</xref>]. Furthermore, social media plays a significant role in how people seek and share information about cancer, both for themselves and for others. While social media can be a valuable tool for connecting with support networks and accessing information, it also presents challenges related to misinformation and the potential for information overload. Despite many perceived benefits of social media use among oncology stakeholders, misinformation poses a critical threat to the value of social media for seeking and sharing cancer-related information [<xref ref-type="bibr" rid="ref87">87</xref>]. It has been suggested that it is necessary for all key stakeholders—including patients and the public, health care providers, researchers, technology companies, and governmental organizations to proactively address the problem of online health misinformation [<xref ref-type="bibr" rid="ref87">87</xref>].</p>
        <p>Furthermore, awareness and beliefs about alcohol and red meat were significantly associated with cancer information seeking. It has been suggested that knowledge about diet-related cancer risks is essential for behavior change; therefore, increasing public knowledge and risk beliefs about the link between alcohol and cancer, particularly among those who consume alcohol, may contribute to declines in the burden of alcohol-related disease in the United States [<xref ref-type="bibr" rid="ref84">84</xref>]. Further research is warranted to understand these factors better and to develop effective strategies to improve dietary behaviors among cancer survivors [<xref ref-type="bibr" rid="ref85">85</xref>].</p>
      </sec>
      <sec>
        <title>Strengths and Limitations</title>
        <p>This study has several notable strengths. First, this study used the most recent HINTS 6 data to examine the prevalence of cancer information seeking. The HINTS data provides unparalleled insights into health information seeking behaviors, social media use, beliefs about alcohol and cancer, etc. Second, we performed feature selection using 2 widely used methods, LASSO and Boruta, to identify common variables across both methods. Third, we inferred PC scores (factor scores) and then used weighted LR analyses to estimate the associations of potential factors and PC scores with colorectal cancer screening. Fourth, we compared 5 ML algorithms and found that the RF model demonstrated outstanding classification performance in predicting cancer information seeking. Moreover, we used stepwise LR analysis to confirm the results from ML techniques.</p>
        <p>Despite these strengths, our analysis has some limitations. First, because the HINTS data are cross-sectional, we could only identify correlations rather than causal relationships. Future research could address this limitation by applying a rigorous quasi-experimental method to longitudinal datasets. Second, since information from participants was self-reported, our study may be subject to recall bias as well as social desirability bias. Third, a notable limitation is the relatively low response rate of the HINTS 6. Low response rates raise concerns about nonresponse bias, particularly if nonrespondents differ meaningfully from respondents on key variables such as alcohol use, health information–seeking, or cancer awareness. Although HINTS incorporates sampling weights, replicate weights, and nonresponse adjustments to enhance population representativeness, these statistical corrections cannot fully eliminate bias arising from selective participation. Therefore, caution is warranted when interpreting the generalizability of our findings to all US adults. Fourth, the data were collected in 2022, and the COVID-19 pandemic may have influenced both data collection and results. In addition, this study used only the binary outcome (seeker vs nonseeker). For the seekers, there are still 4 questions in the data such as how much do you agree or disagree: it took a lot of effort to get the information you needed, you felt frustrated during your search for the information, you were concerned about the quality of the information, and the information you found was hard to understand. We did not include these subquestions. Furthermore, because the outcome captured lifetime (“ever”) cancer information–seeking, whereas several predictors reflected respondents’ current status in 2022, temporal misalignment may exist. Information seeking may have occurred prior to the measurement of current characteristics; therefore, associations should be interpreted as correlational rather than causal.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>This study provided the updated prevalence of cancer information seeking among US adults. Furthermore, we performed feature selection and compared 5 ML algorithms for classifying cancer information seeking, and identified that RF was the best performer with the highest AUC. Moreover, PCA proved useful for data mining to reduce the indicators in complex survey data and aid in feature selection. In addition, based on the stepwise regression model, 19 out of 27 selected variables were significantly associated with cancer information seeking. We identified a set of predictive variables for cancer information seeking, such as having cancer, having a family history of cancer, worrying about getting cancer, knowledge of genetic tests and HPV, being offered access to health records, higher income, often watching videos on social media, believing that alcohol consumption increases cancer risk, and frequency of visiting providers. Our findings may benefit researchers, policymakers, and health care providers by increasing public awareness and supporting targeted education on cancer information seeking.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Feature selection based on LASSO and Boruta. LASSO: least absolute shrinkage and selection operator.</p>
        <media xlink:href="medinform_v14i1e75862_app1.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 16 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Eigenvalues based on PCA. PCA: principal component analysis.</p>
        <media xlink:href="medinform_v14i1e75862_app2.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 17 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Rotated component/loading of variables for 20 factors.</p>
        <media xlink:href="medinform_v14i1e75862_app3.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 21 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Machine learning and comparison of performance.</p>
        <media xlink:href="medinform_v14i1e75862_app4.docx" xlink:title="DOCX File , 25 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Additional figures.</p>
        <media xlink:href="medinform_v14i1e75862_app5.docx" xlink:title="DOCX File , 177 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ANN</term>
          <def>
            <p>artificial neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">DT</term>
          <def>
            <p>decision tree</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">HINTS 6</term>
          <def>
            <p>2022 Health Information National Trends Survey</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HINTS</term>
          <def>
            <p>Health Information National Trends Survey</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">HPV</term>
          <def>
            <p>human papillomavirus</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">KNN</term>
          <def>
            <p>k-nearest neighbor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">LASSO</term>
          <def>
            <p>least absolute shrinkage and selection operator</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">LR</term>
          <def>
            <p>logistic regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">NCI</term>
          <def>
            <p>National Cancer Institute</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">OR</term>
          <def>
            <p>odds ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">PC</term>
          <def>
            <p>principal component</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">PCA</term>
          <def>
            <p>principal component analysis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">RBF</term>
          <def>
            <p>radial basis function</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">XGBoost</term>
          <def>
            <p>extreme gradient boosting</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors would like to thank the National Cancer Institute for providing the data from the 2022 Health Information National Trends Survey.</p>
    </ack>
    <notes>
      <title>Data Availabilty</title>
      <p>The data that support the findings of this study are openly available at the National Cancer Institute [<xref ref-type="bibr" rid="ref88">88</xref>].</p>
    </notes>
    <notes>
      <title>Funding</title>
      <p>No funding source is given for this paper.</p>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>YL contributed to writing—original draft, software, methodology, formal analysis, and data curation. KW contributed to writing—original draft, review and editing, software, methodology, formal analysis, and conceptualization.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weaver</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Mays</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Weaver</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Hopkins</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Eroğlu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bernhardt</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Health information–seeking behaviors, health indicators, and health risks</article-title>
          <source>Am J Public Health</source>
          <year>2010</year>
          <volume>100</volume>
          <issue>8</issue>
          <fpage>1520</fpage>
          <lpage>1525</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.2009.180521</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>Cancer statistics</article-title>
          <source>National Cancer Institute</source>
          <year>2024</year>
          <access-date>2025-03-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cancer.gov/about-cancer/understanding/statistics">https://www.cancer.gov/about-cancer/understanding/statistics</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roach</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Lykins</surname>
              <given-names>ELB</given-names>
            </name>
            <name name-style="western">
              <surname>Gochett</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Brechting</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Graue</surname>
              <given-names>LO</given-names>
            </name>
            <name name-style="western">
              <surname>Andrykowski</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Differences in cancer information-seeking behavior, preferences, and awareness between cancer survivors and healthy controls: a national, population-based survey</article-title>
          <source>J Cancer Educ</source>
          <year>2009</year>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>73</fpage>
          <lpage>79</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/19259869"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/08858190802664784</pub-id>
          <pub-id pub-id-type="medline">19259869</pub-id>
          <pub-id pub-id-type="pii">909206173</pub-id>
          <pub-id pub-id-type="pmcid">PMC3315685</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vanderpool</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>GC</given-names>
            </name>
            <name name-style="western">
              <surname>Mollica</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gutierrez</surname>
              <given-names>AI</given-names>
            </name>
            <name name-style="western">
              <surname>Maynard</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Cancer information-seeking in an age of COVID-19: findings from the National Cancer Institute's Cancer Information Service</article-title>
          <source>Health Commun</source>
          <year>2021</year>
          <volume>36</volume>
          <issue>1</issue>
          <fpage>89</fpage>
          <lpage>97</lpage>
          <pub-id pub-id-type="doi">10.1080/10410236.2020.1847449</pub-id>
          <pub-id pub-id-type="medline">33225770</pub-id>
          <pub-id pub-id-type="pmcid">PMC12645444</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hornik</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Romantan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>DeMichele</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fishbein</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hull</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nagler</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Niederdeppe</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ramírez</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Smith-McLallen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Cancer information scanning and seeking in the general population</article-title>
          <source>J Health Commun</source>
          <year>2010</year>
          <volume>15</volume>
          <issue>7</issue>
          <fpage>734</fpage>
          <lpage>753</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21104503"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/10810730.2010.514029</pub-id>
          <pub-id pub-id-type="medline">21104503</pub-id>
          <pub-id pub-id-type="pii">929760220</pub-id>
          <pub-id pub-id-type="pmcid">PMC3661288</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shim</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hornik</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Cancer information scanning and seeking behavior is associated with knowledge, lifestyle choices, and screening</article-title>
          <source>J Health Commun</source>
          <year>2006</year>
          <volume>11 Suppl 1</volume>
          <fpage>157</fpage>
          <lpage>172</lpage>
          <pub-id pub-id-type="doi">10.1080/10810730600637475</pub-id>
          <pub-id pub-id-type="medline">16641081</pub-id>
          <pub-id pub-id-type="pii">LN63N077JK104326</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Niederdeppe</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hornik</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Frosch</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Romantan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Barg</surname>
              <given-names>FK</given-names>
            </name>
            <name name-style="western">
              <surname>Weiner</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Examining the dimensions of cancer-related information seeking and scanning behavior</article-title>
          <source>Health Commun</source>
          <year>2007</year>
          <volume>22</volume>
          <issue>2</issue>
          <fpage>153</fpage>
          <lpage>167</lpage>
          <pub-id pub-id-type="doi">10.1080/10410230701454189</pub-id>
          <pub-id pub-id-type="medline">17668995</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mayer</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Terrin</surname>
              <given-names>NC</given-names>
            </name>
            <name name-style="western">
              <surname>Kreps</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Menon</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>McCance</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Parsons</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Mooney</surname>
              <given-names>KH</given-names>
            </name>
          </person-group>
          <article-title>Cancer survivors information seeking behaviors: a comparison of survivors who do and do not seek information about cancer</article-title>
          <source>Patient Educ Couns</source>
          <year>2007</year>
          <volume>65</volume>
          <issue>3</issue>
          <fpage>342</fpage>
          <lpage>350</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/17029864"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.pec.2006.08.015</pub-id>
          <pub-id pub-id-type="medline">17029864</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(06)00295-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC5693234</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rutten</surname>
              <given-names>LJF</given-names>
            </name>
            <name name-style="western">
              <surname>Squiers</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hesse</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Cancer-related information seeking: hints from the 2003 Health Information National Trends Survey (HINTS)</article-title>
          <source>J Health Commun</source>
          <year>2006</year>
          <volume>11 Suppl 1</volume>
          <fpage>147</fpage>
          <lpage>156</lpage>
          <pub-id pub-id-type="doi">10.1080/10810730600637574</pub-id>
          <pub-id pub-id-type="medline">16641080</pub-id>
          <pub-id pub-id-type="pii">HJ111GR5193R2111</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sacca</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Maroun</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Khoury</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Predictors of high trust and the role of confidence levels in seeking cancer-related information</article-title>
          <source>Inform Health Soc Care</source>
          <year>2022</year>
          <volume>47</volume>
          <issue>1</issue>
          <fpage>53</fpage>
          <lpage>61</lpage>
          <pub-id pub-id-type="doi">10.1080/17538157.2021.1925676</pub-id>
          <pub-id pub-id-type="medline">34014145</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mahmood</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kedia</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ogunsanmi</surname>
              <given-names>DO</given-names>
            </name>
            <name name-style="western">
              <surname>Kabir</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Entwistle</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Patient-centered communication and cancer information-seeking experiences among cancer survivors: a population-based study in the United States</article-title>
          <source>Patient Educ Couns</source>
          <year>2025</year>
          <volume>135</volume>
          <fpage>108710</fpage>
          <pub-id pub-id-type="doi">10.1016/j.pec.2025.108710</pub-id>
          <pub-id pub-id-type="medline">40010060</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(25)00077-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nagler</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Romantan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>DeMichele</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Hornik</surname>
              <given-names>RC</given-names>
            </name>
          </person-group>
          <article-title>Differences in information seeking among breast, prostate, and colorectal cancer patients: results from a population-based survey</article-title>
          <source>Patient Educ Couns</source>
          <year>2010</year>
          <volume>81 Suppl</volume>
          <fpage>S54</fpage>
          <lpage>S62</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20934297"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.pec.2010.09.010</pub-id>
          <pub-id pub-id-type="medline">20934297</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(10)00558-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC2993788</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Perceived needs versus predisposing/enabling characteristics in relation to internet cancer information seeking among the US and Chinese public: comparative survey research</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>e24733</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/1/e24733/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/24733</pub-id>
          <pub-id pub-id-type="medline">33427668</pub-id>
          <pub-id pub-id-type="pii">v23i1e24733</pub-id>
          <pub-id pub-id-type="pmcid">PMC7834927</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Holland</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Association of cancer information seeking behavior with cigarette smoking and e-cigarette use among U.S. adults by education attainment level: a multi-year cross-sectional analysis from a nationally representative sample in 2017-2020</article-title>
          <source>Prev Med</source>
          <year>2023</year>
          <volume>172</volume>
          <fpage>107550</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ypmed.2023.107550</pub-id>
          <pub-id pub-id-type="medline">37210044</pub-id>
          <pub-id pub-id-type="pii">S0091-7435(23)00130-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaphingst</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Lachance</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Condit</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>Beliefs about heritability of cancer and health information seeking and preventive behaviors</article-title>
          <source>J Cancer Educ</source>
          <year>2009</year>
          <volume>24</volume>
          <issue>4</issue>
          <fpage>351</fpage>
          <lpage>356</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/19838898"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/08858190902876304</pub-id>
          <pub-id pub-id-type="medline">19838898</pub-id>
          <pub-id pub-id-type="pii">915933084</pub-id>
          <pub-id pub-id-type="pmcid">PMC2769023</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>The role of risk, efficacy, and anxiety in smokers' cancer information seeking</article-title>
          <source>Health Commun</source>
          <year>2009</year>
          <volume>24</volume>
          <issue>3</issue>
          <fpage>259</fpage>
          <lpage>269</lpage>
          <pub-id pub-id-type="doi">10.1080/10410230902805932</pub-id>
          <pub-id pub-id-type="medline">19415558</pub-id>
          <pub-id pub-id-type="pii">910939867</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arora</surname>
              <given-names>NK</given-names>
            </name>
            <name name-style="western">
              <surname>Hesse</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Rimer</surname>
              <given-names>BK</given-names>
            </name>
            <name name-style="western">
              <surname>Viswanath</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Clayman</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Croyle</surname>
              <given-names>RT</given-names>
            </name>
          </person-group>
          <article-title>Frustrated and confused: the American public rates its cancer-related information-seeking experiences</article-title>
          <source>J Gen Intern Med</source>
          <year>2008</year>
          <volume>23</volume>
          <issue>3</issue>
          <fpage>223</fpage>
          <lpage>228</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/17922166"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-007-0406-y</pub-id>
          <pub-id pub-id-type="medline">17922166</pub-id>
          <pub-id pub-id-type="pmcid">PMC2359461</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agurs-Collins</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ottenbacher</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Waters</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connell</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>JG</given-names>
            </name>
          </person-group>
          <article-title>Public awareness of direct-to-consumer genetic tests: findings from the 2013 U.S. Health Information National Trends Survey</article-title>
          <source>J Cancer Educ</source>
          <year>2015</year>
          <volume>30</volume>
          <issue>4</issue>
          <fpage>799</fpage>
          <lpage>807</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25600375"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13187-014-0784-x</pub-id>
          <pub-id pub-id-type="medline">25600375</pub-id>
          <pub-id pub-id-type="pii">10.1007/s13187-014-0784-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC4508242</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>YC</given-names>
            </name>
          </person-group>
          <article-title>Cultural differences in cancer information acquisition: cancer risk perceptions, fatalistic beliefs, and worry as predictors of cancer information seeking and avoidance in the U.S. and China</article-title>
          <source>Health Commun</source>
          <year>2022</year>
          <volume>37</volume>
          <issue>11</issue>
          <fpage>1442</fpage>
          <lpage>1451</lpage>
          <pub-id pub-id-type="doi">10.1080/10410236.2021.1901422</pub-id>
          <pub-id pub-id-type="medline">33752516</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Van Stee</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Online cancer information seeking: applying and extending the comprehensive model of information seeking</article-title>
          <source>Health Commun</source>
          <year>2018</year>
          <volume>33</volume>
          <issue>12</issue>
          <fpage>1583</fpage>
          <lpage>1592</lpage>
          <pub-id pub-id-type="doi">10.1080/10410236.2017.1384350</pub-id>
          <pub-id pub-id-type="medline">29083231</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kye</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Yun</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Factors related to cancer information scanning and seeking behavior among high school students in Korea</article-title>
          <source>Asian Pac J Cancer Prev</source>
          <year>2012</year>
          <volume>13</volume>
          <issue>4</issue>
          <fpage>1439</fpage>
          <lpage>1445</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://journal.waocp.org/?sid=Entrez:PubMed&amp;id=pmid:22799345&amp;key=2012.13.4.1439"/>
          </comment>
          <pub-id pub-id-type="doi">10.7314/apjcp.2012.13.4.1439</pub-id>
          <pub-id pub-id-type="medline">22799345</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nelissen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Beullens</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lemal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Van den Bulck</surname>
              <given-names>Jan</given-names>
            </name>
          </person-group>
          <article-title>Fear of cancer is associated with cancer information seeking, scanning and avoiding: a cross-sectional study among cancer diagnosed and non-diagnosed individuals</article-title>
          <source>Health Info Libr J</source>
          <year>2015</year>
          <volume>32</volume>
          <issue>2</issue>
          <fpage>107</fpage>
          <lpage>119</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/10.1111/hir.12100"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/hir.12100</pub-id>
          <pub-id pub-id-type="medline">25809822</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hurtado</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Siefkas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Attwood</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Iqbal</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Machine learning applications and advancements in alcohol use disorder: a systematic review</article-title>
          <source>Addict Med</source>
          <year>2022</year>
          <fpage>12</fpage>
          <pub-id pub-id-type="doi">10.1101/2022.06.06.22276057</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kufel</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bargieł-Łączek</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kocot</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Koźlik</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bartnikowska</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Janik</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Czogalik</surname>
              <given-names>?</given-names>
            </name>
            <name name-style="western">
              <surname>Dudek</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Magiera</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Paszkiewicz</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nawrat</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cebula</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gruszczyńska</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>What is machine learning, artificial neural networks and deep learning?-examples of practical applications in medicine</article-title>
          <source>Diagnostics (Basel)</source>
          <year>2023</year>
          <volume>13</volume>
          <issue>15</issue>
          <fpage>2582</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=diagnostics13152582"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/diagnostics13152582</pub-id>
          <pub-id pub-id-type="medline">37568945</pub-id>
          <pub-id pub-id-type="pii">diagnostics13152582</pub-id>
          <pub-id pub-id-type="pmcid">PMC10417718</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Theodosiou</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Read</surname>
              <given-names>RC</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence, machine learning and deep learning: potential resources for the infection clinician</article-title>
          <source>J Infect</source>
          <year>2023</year>
          <volume>87</volume>
          <issue>4</issue>
          <fpage>287</fpage>
          <lpage>294</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0163-4453(23)00379-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jinf.2023.07.006</pub-id>
          <pub-id pub-id-type="medline">37468046</pub-id>
          <pub-id pub-id-type="pii">S0163-4453(23)00379-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kumari</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A structured analysis to study the role of machine learning and deep learning in the healthcare sector with big data analytics</article-title>
          <source>Arch Comput Methods Eng</source>
          <year>2023</year>
          <volume>30</volume>
          <issue>6</issue>
          <fpage>3673</fpage>
          <lpage>3701</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37359744"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11831-023-09915-y</pub-id>
          <pub-id pub-id-type="medline">37359744</pub-id>
          <pub-id pub-id-type="pii">9915</pub-id>
          <pub-id pub-id-type="pmcid">PMC10064607</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shastry</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Vijayakumar</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Deep learning techniques for the effective prediction of Alzheimer's disease: a comprehensive review</article-title>
          <source>Healthcare (Basel)</source>
          <year>2022</year>
          <volume>10</volume>
          <issue>10</issue>
          <fpage>1842</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=healthcare10101842"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/healthcare10101842</pub-id>
          <pub-id pub-id-type="medline">36292289</pub-id>
          <pub-id pub-id-type="pii">healthcare10101842</pub-id>
          <pub-id pub-id-type="pmcid">PMC9601959</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kopsaftopoulos</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Flight state identification of a self-sensing wing via an improved feature selection method and machine learning approaches</article-title>
          <source>Sensors (Basel)</source>
          <year>2018</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>1379</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=s18051379"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/s18051379</pub-id>
          <pub-id pub-id-type="medline">29710832</pub-id>
          <pub-id pub-id-type="pii">s18051379</pub-id>
          <pub-id pub-id-type="pmcid">PMC5982412</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Awan</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Bennamoun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sohel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sanfilippo</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Chow</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dwivedi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Feature selection and transformation by machine learning reduce variable numbers and improve prediction for heart failure readmission or death</article-title>
          <source>PLoS One</source>
          <year>2019</year>
          <volume>14</volume>
          <issue>6</issue>
          <fpage>e0218760</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0218760"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0218760</pub-id>
          <pub-id pub-id-type="medline">31242238</pub-id>
          <pub-id pub-id-type="pii">PONE-D-19-08930</pub-id>
          <pub-id pub-id-type="pmcid">PMC6594617</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cömert</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Şengür</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Budak</surname>
              <given-names>Ü</given-names>
            </name>
            <name name-style="western">
              <surname>Kocamaz</surname>
              <given-names>AF</given-names>
            </name>
          </person-group>
          <article-title>Prediction of intrapartum fetal hypoxia considering feature selection algorithms and machine learning models</article-title>
          <source>Health Inf Sci Syst</source>
          <year>2019</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>17</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31435480"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13755-019-0079-z</pub-id>
          <pub-id pub-id-type="medline">31435480</pub-id>
          <pub-id pub-id-type="pii">79</pub-id>
          <pub-id pub-id-type="pmcid">PMC6702252</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raihan-Al-Masud</surname>
              <given-names>Md</given-names>
            </name>
            <name name-style="western">
              <surname>Mondal</surname>
              <given-names>MRH</given-names>
            </name>
          </person-group>
          <article-title>Data-driven diagnosis of spinal abnormalities using feature selection and machine learning algorithms</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <volume>15</volume>
          <issue>2</issue>
          <fpage>e0228422</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0228422"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0228422</pub-id>
          <pub-id pub-id-type="medline">32027680</pub-id>
          <pub-id pub-id-type="pii">PONE-D-19-24261</pub-id>
          <pub-id pub-id-type="pmcid">PMC7004343</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Atuegwu</surname>
              <given-names>NC</given-names>
            </name>
            <name name-style="western">
              <surname>Litt</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Krishnan-Sarin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Laubenbacher</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Perez</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Mortensen</surname>
              <given-names>EM</given-names>
            </name>
          </person-group>
          <article-title>E-cigarette use in young adult never cigarette smokers with disabilities: results from the behavioral risk factor surveillance system survey</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2021</year>
          <volume>18</volume>
          <issue>10</issue>
          <fpage>5476</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph18105476"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph18105476</pub-id>
          <pub-id pub-id-type="medline">34065407</pub-id>
          <pub-id pub-id-type="pii">ijerph18105476</pub-id>
          <pub-id pub-id-type="pmcid">PMC8160823</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Atuegwu</surname>
              <given-names>NC</given-names>
            </name>
            <name name-style="western">
              <surname>Oncken</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Laubenbacher</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Perez</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Mortensen</surname>
              <given-names>EM</given-names>
            </name>
          </person-group>
          <article-title>Factors associated with e-cigarette use in U.S. young adult never smokers of conventional cigarettes: a machine learning approach</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2020</year>
          <volume>17</volume>
          <issue>19</issue>
          <fpage>7271</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph17197271"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph17197271</pub-id>
          <pub-id pub-id-type="medline">33027932</pub-id>
          <pub-id pub-id-type="pii">ijerph17197271</pub-id>
          <pub-id pub-id-type="pmcid">PMC7579019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>HT</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrell</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Woo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Haddad</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Machine learning-based nicotine addiction prediction models for youth e-cigarette and waterpipe (hookah) users</article-title>
          <source>J Clin Med</source>
          <year>2021</year>
          <volume>10</volume>
          <issue>5</issue>
          <fpage>972</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=jcm10050972"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/jcm10050972</pub-id>
          <pub-id pub-id-type="medline">33801175</pub-id>
          <pub-id pub-id-type="pii">jcm10050972</pub-id>
          <pub-id pub-id-type="pmcid">PMC7957622</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chaiton</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Leventhal</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Unger</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Barrington-Trimis</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>A machine learning approach to identify predictors of frequent vaping and vulnerable Californian youth subgroups</article-title>
          <source>Nicotine Tob Res</source>
          <year>2022</year>
          <volume>24</volume>
          <issue>7</issue>
          <fpage>1028</fpage>
          <lpage>1036</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34888698"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/ntr/ntab257</pub-id>
          <pub-id pub-id-type="medline">34888698</pub-id>
          <pub-id pub-id-type="pii">6458468</pub-id>
          <pub-id pub-id-type="pmcid">PMC9199938</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Castillo-Barnes</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ramírez</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Salas-Gonzalez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Martinez-Murcia</surname>
              <given-names>FJ</given-names>
            </name>
            <name name-style="western">
              <surname>Illan</surname>
              <given-names>IA</given-names>
            </name>
            <name name-style="western">
              <surname>Segovia</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ortiz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cruchaga</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Farlow</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Graff-Radford</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>Schofield</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>Masters</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Salloway</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jucker</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mori</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gorriz</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Autosomal dominantly inherited Alzheimer disease: analysis of genetic subgroups by machine learning</article-title>
          <source>Information Fusion</source>
          <year>2020</year>
          <volume>58</volume>
          <fpage>153</fpage>
          <lpage>167</lpage>
          <pub-id pub-id-type="doi">10.1016/j.inffus.2020.01.001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Texture analysis of fat-suppressed T2-weighted magnetic resonance imaging and use of machine learning to discriminate nasal and paranasal sinus small round malignant cell tumors</article-title>
          <source>Front Oncol</source>
          <year>2021</year>
          <volume>11</volume>
          <fpage>701289</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34966664"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fonc.2021.701289</pub-id>
          <pub-id pub-id-type="medline">34966664</pub-id>
          <pub-id pub-id-type="pmcid">PMC8710453</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grazioli</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Butts</surname>
              <given-names>CT</given-names>
            </name>
          </person-group>
          <article-title>Comparative exploratory analysis of intrinsically disordered protein dynamics using machine learning and network analytic methods</article-title>
          <source>Front Mol Biosci</source>
          <year>2019</year>
          <volume>6</volume>
          <fpage>42</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31245383"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fmolb.2019.00042</pub-id>
          <pub-id pub-id-type="medline">31245383</pub-id>
          <pub-id pub-id-type="pmcid">PMC6581705</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rex</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Colonoscopy remains an important option for primary screening for colorectal cancer</article-title>
          <source>Dig Dis Sci</source>
          <year>2025</year>
          <volume>70</volume>
          <issue>5</issue>
          <fpage>1595</fpage>
          <lpage>1605</lpage>
          <pub-id pub-id-type="doi">10.1007/s10620-024-08760-8</pub-id>
          <pub-id pub-id-type="medline">39666212</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10620-024-08760-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Shia</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Identifying the predictors of severe psychological distress by auto-machine learning methods</article-title>
          <source>Inform Med Unlocked</source>
          <year>2023</year>
          <volume>39</volume>
          <fpage>101258</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2352-9148(23)00100-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.imu.2023.101258</pub-id>
          <pub-id pub-id-type="medline">37152204</pub-id>
          <pub-id pub-id-type="pii">S2352-9148(23)00100-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC10141788</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shia</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Using the H2O automatic machine learning algorithms to identify predictors of web-based medical record nonuse among patients in a data-rich environment: mixed methods study</article-title>
          <source>JMIR Med Inform</source>
          <year>2023</year>
          <volume>11</volume>
          <fpage>e41576</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2023//e41576/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/41576</pub-id>
          <pub-id pub-id-type="medline">37335616</pub-id>
          <pub-id pub-id-type="pii">v11i1e41576</pub-id>
          <pub-id pub-id-type="pmcid">PMC10337515</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Machine learning-based prediction of binge drinking among adults in the United State: analysis of the 2022 Health Information National Trends Survey</article-title>
          <year>2024</year>
          <conf-name>Proceedings of the 2024 9th International Conference on Mathematics and Artificial Intelligence</conf-name>
          <conf-date>2024, May 10-12</conf-date>
          <conf-loc>Beijing, China</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3670085.3670090</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Feature selection and machine learning approaches in prediction of current e-cigarette use among U.S. adults in 2022</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2024</year>
          <volume>21</volume>
          <issue>11</issue>
          <fpage>1474</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph21111474"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph21111474</pub-id>
          <pub-id pub-id-type="medline">39595741</pub-id>
          <pub-id pub-id-type="pii">ijerph21111474</pub-id>
          <pub-id pub-id-type="pmcid">PMC11594230</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <article-title>Health Information National Trends Survey HINTS 6 methodology report</article-title>
          <source>Westat</source>
          <year>2023</year>
          <access-date>2025-08-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hints.cancer.gov/docs/methodologyreports/HINTS_6_MethodologyReport.pdf">https://hints.cancer.gov/docs/methodologyreports/HINTS_6_MethodologyReport.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jakobsen</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Gluud</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wetterslev</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Winkel</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>When and how should multiple imputation be used for handling missing data in randomised clinical trials - a practical guide with flowcharts</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2017</year>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>162</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-017-0442-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12874-017-0442-1</pub-id>
          <pub-id pub-id-type="medline">29207961</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12874-017-0442-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5717805</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schafer</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Graham</surname>
              <given-names>JW</given-names>
            </name>
          </person-group>
          <article-title>Missing data: our view of the state of the art</article-title>
          <source>Psychol Methods</source>
          <year>2002</year>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>147</fpage>
          <lpage>177</lpage>
          <pub-id pub-id-type="medline">12090408</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chaiton</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A machine learning approach to predict e-cigarette use and dependence among Ontario youth</article-title>
          <source>Health Promot Chronic Dis Prev Can</source>
          <year>2022</year>
          <volume>42</volume>
          <issue>1</issue>
          <fpage>21</fpage>
          <lpage>28</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.24095/hpcdp.42.1.04"/>
          </comment>
          <pub-id pub-id-type="doi">10.24095/hpcdp.42.1.04</pub-id>
          <pub-id pub-id-type="medline">35044141</pub-id>
          <pub-id pub-id-type="pmcid">PMC9067014</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuhn</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Building predictive models in R using the caret package</article-title>
          <source>J Stat Softw</source>
          <year>2008</year>
          <volume>28</volume>
          <issue>5</issue>
          <fpage>1</fpage>
          <lpage>26</lpage>
          <pub-id pub-id-type="doi">10.18637/jss.v028.i05</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kursa</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Rudnicki</surname>
              <given-names>WR</given-names>
            </name>
          </person-group>
          <article-title>Feature selection with the boruta package</article-title>
          <source>J Stat Softw</source>
          <year>2010</year>
          <volume>36</volume>
          <issue>11</issue>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.18637/jss.v036.i11</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Regularization paths for generalized linear models via coordinate descent</article-title>
          <source>J Stat Softw</source>
          <year>2010</year>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.18637/jss.v033.i01</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cortes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vapnik</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Support-vector networks</article-title>
          <source>Mach Learn</source>
          <year>1995</year>
          <volume>20</volume>
          <issue>3</issue>
          <fpage>273</fpage>
          <lpage>297</lpage>
          <pub-id pub-id-type="doi">10.1007/BF00994018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pradhan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bui</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A comparative study of logistic model tree, random forest, and classification and regression tree models for spatial prediction of landslide susceptibility</article-title>
          <source>Catena</source>
          <year>2017</year>
          <volume>151</volume>
          <fpage>147</fpage>
          <lpage>160</lpage>
          <pub-id pub-id-type="doi">10.1016/j.catena.2016.11.032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kesler</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Blayney</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Oakley-Girvan</surname>
              <given-names>IA</given-names>
            </name>
            <name name-style="western">
              <surname>Karuturi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Palesh</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Predicting long-term cognitive outcome following breast cancer with pre-treatment resting state fMRI and random forest machine learning</article-title>
          <source>Front Hum Neurosci</source>
          <year>2017</year>
          <volume>11</volume>
          <fpage>555</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29187817"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fnhum.2017.00555</pub-id>
          <pub-id pub-id-type="medline">29187817</pub-id>
          <pub-id pub-id-type="pmcid">PMC5694825</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Introduction to machine learning: k-nearest neighbors</article-title>
          <source>Ann Transl Med</source>
          <year>2016</year>
          <volume>4</volume>
          <issue>11</issue>
          <fpage>218</fpage>
          <lpage>218</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27386492"/>
          </comment>
          <pub-id pub-id-type="doi">10.21037/atm.2016.03.37</pub-id>
          <pub-id pub-id-type="medline">27386492</pub-id>
          <pub-id pub-id-type="pii">atm-04-11-218</pub-id>
          <pub-id pub-id-type="pmcid">PMC4916348</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>XGBoost: a scalable tree boosting system</article-title>
          <year>2016</year>
          <conf-name>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining San Francisco California USA</conf-name>
          <conf-date>2016, August 13-17</conf-date>
          <conf-loc>San Francisco, California</conf-loc>
          <fpage>785</fpage>
          <lpage>794</lpage>
          <pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>DeLong</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>DeLong</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Clarke-Pearson</surname>
              <given-names>DL</given-names>
            </name>
          </person-group>
          <article-title>Comparing the areas under two or more correlated receiver operating characteristic curves: a nonparametric approach</article-title>
          <source>Biometrics</source>
          <year>1988</year>
          <volume>44</volume>
          <issue>3</issue>
          <fpage>837</fpage>
          <lpage>845</lpage>
          <pub-id pub-id-type="medline">3203132</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaneko</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Examining variable selection methods for the predictive performance of regression models and the proportion of selected variables and selected random variables</article-title>
          <source>Heliyon</source>
          <year>2021</year>
          <volume>7</volume>
          <issue>6</issue>
          <fpage>e07356</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2405-8440(21)01459-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.heliyon.2021.e07356</pub-id>
          <pub-id pub-id-type="medline">34195450</pub-id>
          <pub-id pub-id-type="pii">S2405-8440(21)01459-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8237311</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rosset</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Random lasso</article-title>
          <source>Ann Appl Stat</source>
          <year>2011</year>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>468</fpage>
          <lpage>485</lpage>
          <pub-id pub-id-type="doi">10.1214/10-aoas377</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Lasso regression and Boruta algorithm to explore the relationship between neutrophil percentage to albumin ratio and asthma: results from the NHANES 2001 to 2018</article-title>
          <source>Clin Exp Med</source>
          <year>2025</year>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>149</fpage>
          <pub-id pub-id-type="doi">10.1007/s10238-025-01701-3</pub-id>
          <pub-id pub-id-type="medline">40347409</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10238-025-01701-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC12065745</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Comparison of machine learning models for predicting stroke risk in hypertensive patients: lasso regression model, random forest model, Boruta algorithm model, and Boruta algorithm combined with lasso regression model</article-title>
          <source>Medicine (Baltimore)</source>
          <year>2025</year>
          <volume>104</volume>
          <issue>22</issue>
          <fpage>e42690</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1097/MD.0000000000042690"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MD.0000000000042690</pub-id>
          <pub-id pub-id-type="medline">40441184</pub-id>
          <pub-id pub-id-type="pii">00005792-202505300-00003</pub-id>
          <pub-id pub-id-type="pmcid">PMC12129492</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>King</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>GF</given-names>
            </name>
            <name name-style="western">
              <surname>Hochheiser</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Clermont</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hauskrecht</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Visweswaran</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Using machine learning to predict the information seeking behavior of clinicians using an electronic medical record system</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2018</year>
          <volume>2018</volume>
          <fpage>673</fpage>
          <lpage>682</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30815109"/>
          </comment>
          <pub-id pub-id-type="medline">30815109</pub-id>
          <pub-id pub-id-type="pmcid">PMC6371238</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Comulada</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Step</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fletcher</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Tanner</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Dowshen</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Arayasirikul</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Keglovitz Baker</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zuniga</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Swendeman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Medich</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kao</surname>
              <given-names>UH</given-names>
            </name>
            <name name-style="western">
              <surname>Northrup</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nieto</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Brooks</surname>
              <given-names>RA</given-names>
            </name>
            <collab>Special Projects Of National Significance Social Media Initiative Study Group</collab>
          </person-group>
          <article-title>Predictors of internet health information-seeking behaviors among young adults living with HIV across the United States: longitudinal observational study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <volume>22</volume>
          <issue>11</issue>
          <fpage>e18309</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/11/e18309/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/18309</pub-id>
          <pub-id pub-id-type="medline">33136057</pub-id>
          <pub-id pub-id-type="pii">v22i11e18309</pub-id>
          <pub-id pub-id-type="pmcid">PMC7669436</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Comulada</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Goldbeck</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Almirol</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gunn</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ocasio</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Fernández</surname>
              <given-names>MI</given-names>
            </name>
            <name name-style="western">
              <surname>Arnold</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Romero-Espinoza</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Urauchi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ramos</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Rotheram-Borus</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Klausner</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Swendeman</surname>
              <given-names>D</given-names>
            </name>
            <collab>Adolescent Medicine Trials Network (ATN) CARES Team</collab>
          </person-group>
          <article-title>Using machine learning to predict young people's internet health and social service information seeking</article-title>
          <source>Prev Sci</source>
          <year>2021</year>
          <volume>22</volume>
          <issue>8</issue>
          <fpage>1173</fpage>
          <lpage>1184</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33974226"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11121-021-01255-2</pub-id>
          <pub-id pub-id-type="medline">33974226</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11121-021-01255-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC8541921</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oladeji</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Moradi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tarapore</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Stokes</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Marivate</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Sengeh</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Nsoesie</surname>
              <given-names>EO</given-names>
            </name>
          </person-group>
          <article-title>Monitoring information-seeking patterns and obesity prevalence in Africa with internet search data: observational study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2021</year>
          <volume>7</volume>
          <issue>4</issue>
          <fpage>e24348</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2021/4/e24348/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/24348</pub-id>
          <pub-id pub-id-type="medline">33913815</pub-id>
          <pub-id pub-id-type="pii">v7i4e24348</pub-id>
          <pub-id pub-id-type="pmcid">PMC8120431</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Jiao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The machine learning model for distinguishing pathological subtypes of non-small cell lung cancer</article-title>
          <source>Front Oncol</source>
          <year>2022</year>
          <volume>12</volume>
          <fpage>875761</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35692759"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fonc.2022.875761</pub-id>
          <pub-id pub-id-type="medline">35692759</pub-id>
          <pub-id pub-id-type="pmcid">PMC9177952</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Nie</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Comparison of conventional logistic regression and machine learning methods for predicting delayed cerebral ischemia after aneurysmal subarachnoid hemorrhage: a multicentric observational cohort study</article-title>
          <source>Front Aging Neurosci</source>
          <year>2022</year>
          <volume>14</volume>
          <fpage>857521</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35783143"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fnagi.2022.857521</pub-id>
          <pub-id pub-id-type="medline">35783143</pub-id>
          <pub-id pub-id-type="pmcid">PMC9247265</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hassan</surname>
              <given-names>MdM</given-names>
            </name>
            <name name-style="western">
              <surname>Hassan</surname>
              <given-names>MdM</given-names>
            </name>
            <name name-style="western">
              <surname>Yasmin</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>MdAR</given-names>
            </name>
            <name name-style="western">
              <surname>Zaman</surname>
              <given-names>S</given-names>
            </name>
            <collab>Galibuzzaman</collab>
            <name name-style="western">
              <surname>Islam</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Bairagi</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>A comparative assessment of machine learning algorithms with the least absolute shrinkage and selection operator for breast cancer detection and prediction</article-title>
          <source>Decis Anal J</source>
          <year>2023</year>
          <month>06</month>
          <volume>7</volume>
          <fpage>100245</fpage>
          <pub-id pub-id-type="doi">10.1016/j.dajour.2023.100245</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Siriwardane</surname>
              <given-names>EMD</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Generative design of inorganic compounds using deep diffusion language models</article-title>
          <source>J Phys Chem A</source>
          <year>2024</year>
          <volume>128</volume>
          <issue>29</issue>
          <fpage>5980</fpage>
          <lpage>5989</lpage>
          <pub-id pub-id-type="doi">10.1021/acs.jpca.4c00083</pub-id>
          <pub-id pub-id-type="medline">39008628</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hui</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Regression analysis and validation of risk factors for upper limb dysfunction following modified radical mastectomy for breast cancer patients</article-title>
          <source>Am J Transl Res</source>
          <year>2025</year>
          <volume>17</volume>
          <issue>4</issue>
          <fpage>2614</fpage>
          <lpage>2628</lpage>
          <pub-id pub-id-type="doi">10.62347/CZYA6232</pub-id>
          <pub-id pub-id-type="medline">40385071</pub-id>
          <pub-id pub-id-type="pmcid">PMC12082522</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Different MRI-based radiomics machine learning models to predict CD3+ tumor-infiltrating lymphocytes in rectal cancer</article-title>
          <source>Front Oncol</source>
          <year>2025</year>
          <volume>15</volume>
          <fpage>1509207</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fonc.2025.1509207"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fonc.2025.1509207</pub-id>
          <pub-id pub-id-type="medline">40356764</pub-id>
          <pub-id pub-id-type="pmcid">PMC12066337</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Heinze</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wallisch</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dunkler</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Variable selection - a review and recommendations for the practicing statistician</article-title>
          <source>Biom J</source>
          <year>2018</year>
          <volume>60</volume>
          <issue>3</issue>
          <fpage>431</fpage>
          <lpage>449</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29292533"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/bimj.201700067</pub-id>
          <pub-id pub-id-type="medline">29292533</pub-id>
          <pub-id pub-id-type="pmcid">PMC5969114</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Steyerberg</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Harrell</surname>
              <given-names>FE</given-names>
            </name>
            <name name-style="western">
              <surname>Borsboom</surname>
              <given-names>GJ</given-names>
            </name>
            <name name-style="western">
              <surname>Eijkemans</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Vergouwe</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Habbema</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>Internal validation of predictive models: efficiency of some procedures for logistic regression analysis</article-title>
          <source>J Clin Epidemiol</source>
          <year>2001</year>
          <volume>54</volume>
          <issue>8</issue>
          <fpage>774</fpage>
          <lpage>781</lpage>
          <pub-id pub-id-type="doi">10.1016/s0895-4356(01)00341-9</pub-id>
          <pub-id pub-id-type="medline">11470385</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(01)00341-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Consumer health information seeking in social media: a literature review</article-title>
          <source>Health Info Libr J</source>
          <year>2017</year>
          <volume>34</volume>
          <issue>4</issue>
          <fpage>268</fpage>
          <lpage>283</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/10.1111/hir.12192"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/hir.12192</pub-id>
          <pub-id pub-id-type="medline">29045011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>The impact of social media on guideline-concordant cervical cancer-screening: insights from a national survey</article-title>
          <source>Public Health</source>
          <year>2023</year>
          <volume>223</volume>
          <fpage>50</fpage>
          <lpage>56</lpage>
          <pub-id pub-id-type="doi">10.1016/j.puhe.2023.07.025</pub-id>
          <pub-id pub-id-type="medline">37598576</pub-id>
          <pub-id pub-id-type="pii">S0033-3506(23)00265-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>YJ</given-names>
            </name>
            <name name-style="western">
              <surname>Demiris</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Interventions using social media for cancer prevention and management: a systematic review</article-title>
          <source>Cancer Nurs</source>
          <year>2018</year>
          <volume>41</volume>
          <issue>6</issue>
          <fpage>E19</fpage>
          <lpage>E31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28753192"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/NCC.0000000000000534</pub-id>
          <pub-id pub-id-type="medline">28753192</pub-id>
          <pub-id pub-id-type="pmcid">PMC5787052</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lazard</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Meernik</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>MKR</given-names>
            </name>
            <name name-style="western">
              <surname>Vereen</surname>
              <given-names>RN</given-names>
            </name>
            <name name-style="western">
              <surname>Benedict</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Valle</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Love</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Social media use for cancer support among young adults with cancer</article-title>
          <source>J Adolesc Young Adult Oncol</source>
          <year>2023</year>
          <volume>12</volume>
          <issue>5</issue>
          <fpage>674</fpage>
          <lpage>684</lpage>
          <pub-id pub-id-type="doi">10.1089/jayao.2023.0025</pub-id>
          <pub-id pub-id-type="medline">37257189</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aristokleous</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Karakatsanis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Masannat</surname>
              <given-names>YA</given-names>
            </name>
            <name name-style="western">
              <surname>Kastora</surname>
              <given-names>SL</given-names>
            </name>
          </person-group>
          <article-title>The role of social media in breast cancer care and survivorship: a narrative review</article-title>
          <source>Breast Care (Basel)</source>
          <year>2023</year>
          <volume>18</volume>
          <issue>3</issue>
          <fpage>193</fpage>
          <lpage>199</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1159/000531136"/>
          </comment>
          <pub-id pub-id-type="doi">10.1159/000531136</pub-id>
          <pub-id pub-id-type="medline">37404835</pub-id>
          <pub-id pub-id-type="pii">531136</pub-id>
          <pub-id pub-id-type="pmcid">PMC10314991</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Longfellow</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>CN</given-names>
            </name>
            <name name-style="western">
              <surname>Ormseth</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Skolnick</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Politi</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Rivera</surname>
              <given-names>YM</given-names>
            </name>
            <name name-style="western">
              <surname>Myckatyn</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Social media as a platform for cancer care decision-making among women: internet survey-based study on trust, engagement, and preferences</article-title>
          <source>JMIR Cancer</source>
          <year>2025</year>
          <volume>11</volume>
          <fpage>e64724</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cancer.jmir.org/2025//e64724/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/64724</pub-id>
          <pub-id pub-id-type="medline">40053770</pub-id>
          <pub-id pub-id-type="pii">v11i1e64724</pub-id>
          <pub-id pub-id-type="pmcid">PMC11923483</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref79">
        <label>79</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gaysynsky</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Senft Everson</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Heley</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>WS</given-names>
            </name>
          </person-group>
          <article-title>Perceptions of health misinformation on social media: cross-sectional survey study</article-title>
          <source>JMIR Infodemiol</source>
          <year>2024</year>
          <volume>4</volume>
          <fpage>e51127</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://infodemiology.jmir.org/2024//e51127/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/51127</pub-id>
          <pub-id pub-id-type="medline">38687591</pub-id>
          <pub-id pub-id-type="pii">v4i1e51127</pub-id>
          <pub-id pub-id-type="pmcid">PMC11094599</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref80">
        <label>80</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stimpson</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pruitt</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Ortega</surname>
              <given-names>AN</given-names>
            </name>
          </person-group>
          <article-title>Variation in trust in cancer information sources by perceptions of social media health mis- and disinformation and by race and ethnicity among adults in the United States: cross-sectional study</article-title>
          <source>JMIR Cancer</source>
          <year>2024</year>
          <volume>10</volume>
          <fpage>e54162</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cancer.jmir.org/2024//e54162/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/54162</pub-id>
          <pub-id pub-id-type="medline">38717800</pub-id>
          <pub-id pub-id-type="pii">v10i1e54162</pub-id>
          <pub-id pub-id-type="pmcid">PMC11112477</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref81">
        <label>81</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gapstur</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Bandera</surname>
              <given-names>EV</given-names>
            </name>
            <name name-style="western">
              <surname>Jernigan</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>LoConte</surname>
              <given-names>NK</given-names>
            </name>
            <name name-style="western">
              <surname>Southwell</surname>
              <given-names>BG</given-names>
            </name>
            <name name-style="western">
              <surname>Vasiliou</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Brewster</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Naimi</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Scherr</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Shield</surname>
              <given-names>KD</given-names>
            </name>
          </person-group>
          <article-title>Alcohol and cancer: existing knowledge and evidence gaps across the cancer continuum</article-title>
          <source>Cancer Epidemiol Biomarkers Prev</source>
          <year>2022</year>
          <volume>31</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34728469"/>
          </comment>
          <pub-id pub-id-type="doi">10.1158/1055-9965.EPI-21-0934</pub-id>
          <pub-id pub-id-type="medline">34728469</pub-id>
          <pub-id pub-id-type="pii">1055-9965.EPI-21-0934</pub-id>
          <pub-id pub-id-type="pmcid">PMC8755600</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref82">
        <label>82</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seidenberg</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Wiseman</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>WMP</given-names>
            </name>
          </person-group>
          <article-title>Do beliefs about alcohol and cancer risk vary by alcoholic beverage type and heart disease risk beliefs?</article-title>
          <source>Cancer Epidemiol Biomarkers Prev</source>
          <year>2023</year>
          <volume>32</volume>
          <issue>1</issue>
          <fpage>46</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36453075"/>
          </comment>
          <pub-id pub-id-type="doi">10.1158/1055-9965.EPI-22-0420</pub-id>
          <pub-id pub-id-type="medline">36453075</pub-id>
          <pub-id pub-id-type="pii">711323</pub-id>
          <pub-id pub-id-type="pmcid">PMC9839574</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref83">
        <label>83</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kokole</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ferreira-Borges</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Galea</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rehm</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Neufeld</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Public awareness of the alcohol-cancer link in the EU and UK: a scoping review</article-title>
          <source>Eur J Public Health</source>
          <year>2023</year>
          <volume>33</volume>
          <issue>6</issue>
          <fpage>1128</fpage>
          <lpage>1147</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37802887"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/eurpub/ckad141</pub-id>
          <pub-id pub-id-type="medline">37802887</pub-id>
          <pub-id pub-id-type="pii">7295464</pub-id>
          <pub-id pub-id-type="pmcid">PMC10710347</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref84">
        <label>84</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rohde</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>WMP</given-names>
            </name>
            <name name-style="western">
              <surname>D'Angelo</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Alcohol and cancer risk beliefs as correlates of alcohol consumption status</article-title>
          <source>Am J Prev Med</source>
          <year>2023</year>
          <volume>65</volume>
          <issue>6</issue>
          <fpage>1181</fpage>
          <lpage>1183</lpage>
          <pub-id pub-id-type="doi">10.1016/j.amepre.2023.06.012</pub-id>
          <pub-id pub-id-type="medline">37364661</pub-id>
          <pub-id pub-id-type="pii">S0749-3797(23)00275-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC10749373</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref85">
        <label>85</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mavadiya</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Diet-related awareness and behaviours in cancer survivors compared with non-cancer individuals: a pooled analysis of the HINTS study</article-title>
          <source>Public Health Nutr</source>
          <year>2025</year>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>e102</fpage>
          <pub-id pub-id-type="doi">10.1017/S1368980025100505</pub-id>
          <pub-id pub-id-type="medline">40457752</pub-id>
          <pub-id pub-id-type="pii">S1368980025100505</pub-id>
          <pub-id pub-id-type="pmcid">PMC12264775</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref86">
        <label>86</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sommers</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dizon</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Andreoli</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Henderson</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Assessing health information seeking behaviors among targeted social media users using an infotainment video about a cancer clinical trial: population-based descriptive study</article-title>
          <source>JMIR Cancer</source>
          <year>2025</year>
          <volume>11</volume>
          <fpage>e56098</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cancer.jmir.org/2025//e56098/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/56098</pub-id>
          <pub-id pub-id-type="medline">40029972</pub-id>
          <pub-id pub-id-type="pii">v11i1e56098</pub-id>
          <pub-id pub-id-type="pmcid">PMC11892945</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref87">
        <label>87</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loeb</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Langford</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Bragg</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Sherman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Cancer misinformation on social media</article-title>
          <source>CA Cancer J Clin</source>
          <year>2024</year>
          <volume>74</volume>
          <issue>5</issue>
          <fpage>453</fpage>
          <lpage>464</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/10.3322/caac.21857"/>
          </comment>
          <pub-id pub-id-type="doi">10.3322/caac.21857</pub-id>
          <pub-id pub-id-type="medline">38896503</pub-id>
          <pub-id pub-id-type="pmcid">PMC11648589</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref88">
        <label>88</label>
        <nlm-citation citation-type="web">
          <source>National Cancer Institute</source>
          <access-date>2026-03-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hints.cancer.gov/data/default.aspx">https://hints.cancer.gov/data/default.aspx</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
