<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e73765</article-id><article-id pub-id-type="doi">10.2196/73765</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Predicting Lymph Node Metastasis in Rectal Cancer: Development and Validation of a Machine Learning Model Using Clinical Data</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Hou</surname><given-names>Wei</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Li</surname><given-names>Chuangwei</given-names></name><degrees>MM</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Zhen</given-names></name><degrees>MB</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Wanqin</given-names></name><degrees>MM</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wan</surname><given-names>Shouhong</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Zou</surname><given-names>Bingbing</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Department of General Surgery, The First Affiliated Hospital of Anhui Medical University</institution><addr-line>Hefei, Anhui</addr-line><country>China</country></aff><aff id="aff2"><institution>Institute of Artificial Intelligence, Hefei Comprehensive National Science Center</institution><addr-line>Hefei</addr-line><country>China</country></aff><aff id="aff3"><institution>Anhui Medical University</institution><addr-line>Hefei</addr-line><country>China</country></aff><aff id="aff4"><institution>Department of Radiology, The First Affiliated Hospital of Anhui Medical University</institution><addr-line>Hefei</addr-line><country>China</country></aff><aff id="aff5"><institution>School of Computer Science and Technology, University of Science and Technology of China</institution><addr-line>Hefei</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Yin</surname><given-names>Chengliang</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Hj&#x00E4;rtstr&#x00F6;m</surname><given-names>Malin</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Chen</surname><given-names>Ting</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Bingbing Zou, PhD, Department of General Surgery, The First Affiliated Hospital of Anhui Medical University, Hefei, Anhui, 230022, China; <email>zoubingbing@ahmu.edu.cn</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>23</day><month>9</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e73765</elocation-id><history><date date-type="received"><day>11</day><month>03</month><year>2025</year></date><date date-type="rev-recd"><day>29</day><month>07</month><year>2025</year></date><date date-type="accepted"><day>30</day><month>07</month><year>2025</year></date></history><copyright-statement>&#x00A9; Wei Hou, Chuangwei Li, Zhen Wang, Wanqin Wang, Shouhong Wan, Bingbing Zou. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 23.9.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e73765"/><abstract><sec><title>Background</title><p>Rectal cancer (RC) is a common malignant tumor, with lymph node metastasis (LNM) being a critical determinant of patient prognosis. Traditional diagnostic methods have limitations, necessitating the development of predictive models using clinical data.</p></sec><sec><title>Objective</title><p>This study aimed to construct and validate machine learning (ML) models to predict LNM risk in patients with RC based on clinical data.</p></sec><sec sec-type="methods"><title>Methods</title><p>Retrospective data from 2454 patients with RC (SEER [Surveillance, Epidemiology, and End Results] database) were split into training (n=1954) and internal validation (n=500) sets. An external cohort (n=500) was obtained from the First Affiliated Hospital of Anhui Medical University. Lymph node features identified via computed tomographic scans were integrated with clinicopathological data. Variables were selected using LASSO (Least Absolute Shrinkage and Selection Operator), followed by univariate and multivariate logistic regression. Eleven ML models (Logistic Regression, K-Nearest Neighbors, Extremely Randomized Trees, Naive Bayes, XGBoost [XBG], Light Gradient Boosting Machine, Multilayer Perceptron, Gradient Boosting, Support Vector Machine, Random Forest, and Ada-Boost) were evaluated via area under the receiver operating characteristic curve (AUC), calibration curves, and decision curve analysis.</p></sec><sec sec-type="results"><title>Results</title><p>LNM prevalence was 26.9% (training), 27% (internal validation), and 81% (external validation). Independent LNM predictors included tumor grade, clinical T stage, N stage, tumor length, neural invasion, and total lymph nodes. Internal validation AUC ranged from 0.859 to 0.964; external validation AUC was 0.735&#x2010;0.838. In the internal validation set, Random Forest and Extremely Randomized Trees achieved the highest AUC (0.964, 95% CI 0.950&#x2010;0.978), while XGB demonstrated superior cross-cohort stability (AUC 0.942, 95% CI 0.925&#x2010;0.959). For external validation, Gradient Boosting had the highest AUC (0.838, 95% CI 0.801&#x2010;0.875), followed by XGB (0.832, 95%CI 0.794&#x2010;0.869). XGB showed minimal calibration error with curves closest to the ideal diagonal and yielded the highest net benefit in decision curve analysis across critical thresholds.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study successfully developed and validated 11 ML models to predict LNM risk in RC. The XGB model was optimal, achieving an AUC &#x003E;0.9 in 10 internal models and an AUC &#x003E;0.8 in 7 external models. The identified predictors of LNM can facilitate early diagnosis and personalized treatment, highlighting the potential of integrating computed tomographic scan data with clinicopathological findings to build effective predictive models.</p></sec><sec><title>Trial Registration</title><p>Chinese Clinical Trial Registry ChiCTR2400094858; https://www.chictr.org.cn/showproj.html?proj=254325</p></sec></abstract><kwd-group><kwd>rectal cancer</kwd><kwd>lymph node metastasis</kwd><kwd>machine learning model</kwd><kwd>prediction models</kwd><kwd>clinical data, prognosis</kwd><kwd>XGBoost</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Rectal cancer (RC) is among the most prevalent malignant tumors globally and is currently the second leading cause of cancer-related deaths worldwide [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. A recent study [<xref ref-type="bibr" rid="ref3">3</xref>] projects that by 2040, the incidence of RC will rise to 3.2 million new cases annually, with 1.6 million deaths worldwide. Lymph node metastasis (LNM) is a critical determinant of poor prognosis in RC, and numerous studies [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref7">7</xref>] have shown that accurate prediction of LNM is crucial for treatment selection in patients and prognostic assessment. Initially, while current imaging technologies can assess the risk of LNM to some degree [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>], they largely depend on radiologists&#x2019; anatomical expertise for evaluation [<xref ref-type="bibr" rid="ref11">11</xref>]. However, these methods still face limitations in terms of accuracy and efficiency [<xref ref-type="bibr" rid="ref12">12</xref>]. Specifically, conventional computed tomography (CT)&#x2013;based N staging struggles to detect small metastatic lymph nodes (&#x003C;5 mm) and subtle morphological changes (eg, irregular borders, heterogeneous texture), which are critical for early metastasis diagnosis but require expert manual annotation [<xref ref-type="bibr" rid="ref13">13</xref>]. Second, pathologists determine the presence of LNM in patients with RC based on clinical pathology reports. This approach is inefficient and subject to the constraints of the individual pathologist&#x2019;s expertise.</p><p>In recent years, the rapid advancement of machine learning (ML) technology has led to its growing application in the medical field, particularly in disease prediction, diagnosis, and treatment decision-making, demonstrating significant potential [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. ML algorithms can process and analyze large volumes of clinical data, identifying disease-related patterns and features, which enhances the accuracy of predictive models [<xref ref-type="bibr" rid="ref16">16</xref>]. However, most existing ML models rely on automated radiomic features or clinical data alone, neglecting the value of radiologist-annotated CT morphological features [<xref ref-type="bibr" rid="ref17">17</xref>]. In predicting LNM in RC, previous studies have utilized ML models, including Support Vector Machines, neural networks, and decision trees, achieving notable results [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. Both clinical data and CT results are not entirely accurate in predicting pathological LNM. In the study by Li et al [<xref ref-type="bibr" rid="ref22">22</xref>], the overall accuracy of N stage based on CT images ranged from 59% to 68%. Meanwhile, up to 70% of metastatic lymph nodes in colorectal cancer have a diameter of less than 5 mm. This indicates that both clinicopathological features and preoperative CT have certain limitations in predicting the malignant lymph node status of patients with T1 colorectal cancer. Therefore, there is an urgent need to develop novel predictive tools that integrate ML with radiological parameters and clinical data, thereby enhancing the accuracy and efficiency of diagnostic processes. Recent evidence suggests that systematic manual annotation by trained radiologists can improve the sensitivity of detecting sub-centimeter metastatic lymph nodes compared with routine clinical reports. This highlights the potential of this method in bridging current diagnostic gaps [<xref ref-type="bibr" rid="ref23">23</xref>].</p><p>This study aimed to develop and validate ML models using clinical data to predict LNM risk in RC. We successfully constructed and validated 11 predictive models integrating radiologist-annotated CT lymph node features with clinicopathological data. These models show significant potential to assist clinicians in early diagnosis and personalized treatment planning. We anticipate that the findings will provide new insights for RC management and serve as a valuable reference for future research and practice.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Data Collection and Inclusion Criteria</title><p>This study encompassed clinical data from 2454 patients with RC in the SEER database, wherein 1954 cases were randomly selected to form the training cohort, and the remaining 500 formed the internal validation cohort. Additionally, it included data from 500 patients with RC treated at the author&#x2019;s hospital between January 1, 2017, and December 31, 2023, which served as the external validation cohort. The inclusion criteria were (1) patients with RC staged as I-IV according to the American Joint Committee on Cancer (AJCC) staging system; (2) those who underwent curative surgery; and (3) those with complete clinical and pathological information; and (4) radical surgery in stage IV patients. In this study, some patients with stage IV RC with oligometastasis (limited resectable metastases) or locally advanced tumors with resectable metastatic lesions underwent radical surgery after evaluation by a multidisciplinary team. The surgical decision was based on the multidisciplinary team&#x2019;s comprehensive assessment of tumor burden, physical condition, expected survival, and surgical risks. The goal was to achieve R0 resection and combine it with postoperative adjuvant therapy to reduce recurrence risk. Data for these patients were complete and underwent strict screening to ensure they met the research criteria. The exclusion criteria included (1) a history of other malignant tumors, (2) inability to assess lymph node status, (3) incomplete clinical information, and (4) administration of neoadjuvant therapy. The LNM status for 2454 patients with RC in the SEER database was ascertained from pathological assessments of surgical specimens. For the 500 cases at the author&#x2019;s hospital, LNM status, clinical T and N staging, and tumor length were determined by precisely annotating and measuring contrast-enhanced CT images with the radiological software ITK-SNAP software (version: 4.0.1; Professor Paul Yushkevich's team, University of Pennsylvania, USA), corroborated by pathological assessments from surgical specimens. For example, the definition of tumor length in the SEER database (training/internal validation cohorts) was measured from pathological specimens. In the external validation cohort, tumor length was annotated and measured on contrast-enhanced CT scans using ITK-SNAP software by radiologists, with cross-validation against postoperative pathology (mean error &#x003C;0.3 cm).</p></sec><sec id="s2-2"><title>Clinical Pathological Features</title><p>The study collected the following clinical and pathological characteristics of the patients: sex, age, perineural invasion, carcinoembryonic antigen (CEA), tumor length, clinical T stage, N stage, tumor differentiation, chemotherapy and radiotherapy administration, liver metastasis status, and tumor histology. For the training and internal validation cohorts, data extraction was conducted using the SEER*STAT software, version 8.4.3 (Surveillance, Epidemiology, and End Results Program, National Cancer Institute, USA). In the external validation cohort, clinical data were independently collected and processed by 2 researchers, while CT images were annotated and measured with precision by 50 physicians, evenly distributed into 25 groups, each consisting of 2 physicians for cross-validation purposes. The inclusion criteria for metastatic lymph nodes were fulfilled if any of the following five conditions was met: (1) the short axis/long axis ratio was &#x2265;0.8; (2) the short axis was &#x2265;5 mm; (3) there was aggregation of three or more lymph nodes; (4) the lymph node had an irregular shape with a rough margin; (5) the lymph node signal was inhomogeneous, with high signal areas in the CT imaging for lymph nodes (including mesenteric and presacral lymph nodes). <xref ref-type="fig" rid="figure1">Figure 1</xref> illustrates the acquisition process.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Workflow of this study. AB: Ada-Boost; CT: computed tomography; ET: Extremely Randomized Trees; GB: Gradient Boosting; KNN: K-Nearest Neighbors; LGBM: Light Gradient Boosting Machine; LR: Logistic Regression; MLP: Multilayer Perceptron; NB: Naive Bayes; RF: Random Forest; ROC: receiver operating characteristic; SEER: Surveillance Epidemiology and End Results; SVM: Support Vector Machine; XGB: XGBoost<bold>.</bold></p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e73765_fig01.png"/></fig></sec><sec id="s2-3"><title>Data Processing and Analysis</title><p>All collected data will be subjected to rigorous preprocessing in Python, encompassing data cleaning, outlier detection, and missing value imputation. Subsequent statistical analysis will utilize suitable methods including <italic>&#x03C7;</italic><sup>2</sup> tests, ANOVA, and both univariate and multivariate regression analyses to evaluate the correlations between clinical pathological features and LNM.</p></sec><sec id="s2-4"><title>Feature Engineering and Selection</title><p>In this study, we standardized numerical features, including tumor length, to mitigate the effects of varying scales and enhance their compatibility with the input requirements of ML models. For categorical variables like clinical T staging, we applied label encoding to transform them into numerical data. All features were first normalized to eliminate scale effects (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). We then performed univariate feature screening via independent <italic>t</italic> tests or Mann-Whitney <italic>U</italic> tests (<italic>P</italic>&#x2264;.05), retaining features. Next, Pearson correlation analysis reduced redundancy by removing features with mean absolute correlation &#x003E;0.9, yielding decorrelated features. Finally, features with nonzero coefficients were selected through LASSO regression (10-fold cross-validated) with regularization parameter &#x03B1;. Notably, this LASSO-based selection served as initial screening; tree-based models (Random Forest [RF]/XGBoost [XGB]/Light Gradient Boosting Machine [LGBM]) subsequently performed endogenous feature reweighting to capture nonlinear interactions during model training. This method incorporates an L1 regularization term that aids in pinpointing the most influential features while promoting model sparsity. Furthermore, we performed a correlation analysis among the features to verify the independence of the selected variables. Utilizing the principle of Permutation Importance, we evaluated the importance of each feature to ascertain its contribution to the model&#x2019;s predictive performance.</p></sec><sec id="s2-5"><title>Predictive Model Construction and Validation</title><p>This study developed 11 distinct ML models: Logistic Regression (LR), Naive Bayes (NB), Support Vector Machine, K-Nearest Neighbors (KNN), RF, Extremely Randomized Trees (ET), XGB, LGBM, Gradient Boosting (GB), Ada-Boost (AB), and Multilayer Perceptron (MLP), for predicting the risk of LNM in patients with RC (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). To counteract the imbalance in data distribution, we used random oversampling techniques and applied a penalty parameter &#x03BB; to drive the coefficient estimates of nonsignificant features toward zero. The models&#x2019; performance was evaluated through 10-fold cross-validation. The performance of each ML classifier was assessed using the receiver operating characteristic curve, where a higher area under the receiver operating characteristic curve (AUC) indicates greater predictive accuracy. We evaluated the variable weights and permutation importance and used heatmaps to visualize the significance and correlations among the variables. To select the optimal model, we evaluated each model&#x2019;s performance across multiple metrics: AUC (with 95% CI), accuracy, sensitivity, specificity, and F<sub>1</sub> score. Calibration curves and decision curve analysis (DCA) were further employed to assess calibration accuracy and clinical utility, particularly focusing on net benefits at critical thresholds.</p></sec><sec id="s2-6"><title>Ethical Considerations</title><p>This study was approved by the Ethics Committee of the First Affiliated Hospital of Anhui Medical University (Approval No.: Quick-PJ 2023-13-34). As a retrospective analysis using anonymized data, the requirement for informed consent was waived. Primary data collection in the SEER database obtained patient informed consent, and its Institutional Review Board explicitly authorized secondary analysis without additional approval. All direct identifiers (including names, ID numbers, medical record numbers) were removed from our hospital&#x2019;s dataset, with only aggregated data utilized in the analyses to ensure privacy protection. No participant compensation was involved, and no identifiable images appear in the manuscript or supplementary materials.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Demographic Characteristics and Parameter Selection</title><p>In our study, the training cohort included 1954 patients with RC, the internal validation cohort included 500 patients with RC, and the external validation cohort included 500 patients with RC. Across the training cohort, internal validation cohort, and external validation cohort, stratification by LNM status (positive vs negative) revealed statistically significant differences in age, gender, total lymph node count, tumor length, neural invasion, clinical T stage, N stage, liver metastasis status, tumor histology, and tumor differentiation (all <italic>P</italic>&#x003C;.001; see <xref ref-type="table" rid="table1">Table 1</xref>). For CEA, the <italic>P</italic> value was .001, indicating a statistically significant difference (<xref ref-type="table" rid="table1">Table 1</xref>). We used the LASSO regression method to identify a significant set of risk factors for predicting the risk of LNM in patients with RC.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>The clinicopathological features of the training, internal validation, and external validation cohorts.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Characteristics</td><td align="left" valign="bottom" colspan="2">Training cohort<break/>(N=1954)</td><td align="left" valign="bottom" colspan="2">Internal validation cohort<break/>(N=500)</td><td align="left" valign="bottom" colspan="2">External validation cohort<break/>(N=500)</td><td align="left" valign="bottom" rowspan="2"><italic>P</italic> value</td></tr><tr><td align="left" valign="bottom">Negative (n=1421)</td><td align="left" valign="bottom">Positive (n=533)</td><td align="left" valign="bottom">Negative (n=365)</td><td align="left" valign="bottom">Positive (n=135)</td><td align="left" valign="bottom">Negative (n=195)</td><td align="left" valign="bottom">Positive (n=305)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="8">Sex, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female</td><td align="left" valign="top">543 (38.2)</td><td align="left" valign="top">197 (37.0)</td><td align="left" valign="top">193 (5.0)</td><td align="left" valign="top">44 (33.0)</td><td align="left" valign="top">75 (38.0)</td><td align="left" valign="top">101 (33.0)</td><td align="left" valign="top" rowspan="2">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Male</td><td align="left" valign="top">878 (62.0)</td><td align="left" valign="top">336 (63.0)</td><td align="left" valign="top">172 (47.0)</td><td align="left" valign="top">91 (67.0)</td><td align="left" valign="top">120 (62.0)</td><td align="left" valign="top">204 (67.0)</td></tr><tr><td align="left" valign="top" colspan="8">Age, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>20&#x2264;X&#x2264;24</td><td align="left" valign="top">2 (0.1)</td><td align="left" valign="top">2 (0.4)</td><td align="left" valign="top">1 (0.3)</td><td align="left" valign="top">2 (1.5)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top" rowspan="14">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>25&#x2264;X&#x2264;29</td><td align="left" valign="top">8 (0.6)</td><td align="left" valign="top">4 (0.8)</td><td align="left" valign="top">6 (1.6)</td><td align="left" valign="top">2 (1.5)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">1 (0.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>30&#x2264;X&#x2264;34</td><td align="left" valign="top">20 (1.4)</td><td align="left" valign="top">10 (1.9)</td><td align="left" valign="top">11 (3.0)</td><td align="left" valign="top">3 (2.2)</td><td align="left" valign="top">1 (0.5)</td><td align="left" valign="top">3 (1.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>35&#x2264;X&#x2264;39</td><td align="left" valign="top">36 (2.5)</td><td align="left" valign="top">14 (2.6)</td><td align="left" valign="top">9 (2.5)</td><td align="left" valign="top">6 (4.4)</td><td align="left" valign="top">2 (1.0)</td><td align="left" valign="top">3 (1.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>40&#x2264;X&#x2264;44</td><td align="left" valign="top">63 (4.4)</td><td align="left" valign="top">27 (5.1)</td><td align="left" valign="top">25 (6.9)</td><td align="left" valign="top">18 (13.3)</td><td align="left" valign="top">5 (2.6)</td><td align="left" valign="top">6 (2.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>45&#x2264;X&#x2264;49</td><td align="left" valign="top">97 (6.8)</td><td align="left" valign="top">59 (11.1)</td><td align="left" valign="top">57 (15.6)</td><td align="left" valign="top">16 (11.9)</td><td align="left" valign="top">14 (7.2)</td><td align="left" valign="top">28 (9.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>50&#x2264;X&#x2264;54</td><td align="left" valign="top">181 (13.0)</td><td align="left" valign="top">76 (14.3)</td><td align="left" valign="top">54 (14.8)</td><td align="left" valign="top">19 (14.1)</td><td align="left" valign="top">21 (10.8)</td><td align="left" valign="top">39 (12.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>55&#x2264;X&#x2264;59</td><td align="left" valign="top">211 (15.0)</td><td align="left" valign="top">69 (13.0)</td><td align="left" valign="top">46 (12.6)</td><td align="left" valign="top">17 (12.6)</td><td align="left" valign="top">35 (18.0)</td><td align="left" valign="top">40 (13.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>60&#x2264;X&#x2264;64</td><td align="left" valign="top">216 (15.0)</td><td align="left" valign="top">75 (14.1)</td><td align="left" valign="top">57 (15.6)</td><td align="left" valign="top">12 (8.9)</td><td align="left" valign="top">28 (14.4)</td><td align="left" valign="top">28 (9.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>65&#x2264;X&#x2264;69</td><td align="left" valign="top">217 (15.0)</td><td align="left" valign="top">63 (11.8)</td><td align="left" valign="top">42 (11.5)</td><td align="left" valign="top">15 (11.1)</td><td align="left" valign="top">25 (12.8)</td><td align="left" valign="top">64 (21.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>70&#x2264;X&#x2264;74</td><td align="left" valign="top">164 (12.0)</td><td align="left" valign="top">52 (9.8)</td><td align="left" valign="top">29 (8.0)</td><td align="left" valign="top">10 (7.4)</td><td align="left" valign="top">27 (13.9)</td><td align="left" valign="top">48 (15.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>75&#x2264;X&#x2264;79</td><td align="left" valign="top">107 (8.0)</td><td align="left" valign="top">33 (6.2)</td><td align="left" valign="top">17 (4.7)</td><td align="left" valign="top">10 (7.4)</td><td align="left" valign="top">24 (12.3)</td><td align="left" valign="top">29 (9.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>80&#x2264;X&#x2264;84</td><td align="left" valign="top">63 (4.4)</td><td align="left" valign="top">26 (4.9)</td><td align="left" valign="top">11 (3.0)</td><td align="left" valign="top">5 (3.7)</td><td align="left" valign="top">10 (5.1)</td><td align="left" valign="top">14 (4.6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>85&#x2264;X</td><td align="left" valign="top">36 (2.5)</td><td align="left" valign="top">23 (4.3)</td><td align="left" valign="top">1 (0.3)</td><td align="left" valign="top">2 (1.5)</td><td align="left" valign="top">3 (1.5)</td><td align="left" valign="top">2 (0.7)</td></tr><tr><td align="left" valign="top" colspan="8">Total number of lymph nodes, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>0&#x003C;X&#x2264;5</td><td align="left" valign="top">89 (6.3)</td><td align="left" valign="top">17 (3.2)</td><td align="left" valign="top">20 (5.5)</td><td align="left" valign="top">7 (5.2)</td><td align="left" valign="top">3 (1.5)</td><td align="left" valign="top">4 (1.3)</td><td align="left" valign="top" rowspan="7">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5&#x003C;X&#x2264;10</td><td align="left" valign="top">130 (9.0)</td><td align="left" valign="top">40 (7.5)</td><td align="left" valign="top">24 (6.6)</td><td align="left" valign="top">10 (7.4)</td><td align="left" valign="top">36 (18.5)</td><td align="left" valign="top">30 (9.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>10&#x003C;X&#x2264;15</td><td align="left" valign="top">455 (32.0)</td><td align="left" valign="top">148 (28.0)</td><td align="left" valign="top">113 (31.0)</td><td align="left" valign="top">40 (29.6)</td><td align="left" valign="top">130 (67.0)</td><td align="left" valign="top">211 (69.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>15&#x003C;X&#x2264;20</td><td align="left" valign="top">319 (22.0)</td><td align="left" valign="top">131 (25.0)</td><td align="left" valign="top">87 (23.9)</td><td align="left" valign="top">36 (26.7)</td><td align="left" valign="top">23 (11.8)</td><td align="left" valign="top">53 (17.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>20&#x003C;X&#x2264;25</td><td align="left" valign="top">186 (13.0)</td><td align="left" valign="top">77 (14.5)</td><td align="left" valign="top">62 (17.0)</td><td align="left" valign="top">15 (11.1)</td><td align="left" valign="top">3 (1.5)</td><td align="left" valign="top">6 (20.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>25&#x003C;X&#x2264;30</td><td align="left" valign="top">106 (7.0)</td><td align="left" valign="top">50 (9.4)</td><td align="left" valign="top">28 (7.7)</td><td align="left" valign="top">12 (8.9)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">1 (0.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>30&#x003C;X</td><td align="left" valign="top">134 (9.0)</td><td align="left" valign="top">70 (13.1)</td><td align="left" valign="top">30 (8.2)</td><td align="left" valign="top">15 (11.1)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top" colspan="8">Tumor length, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>0&#x003C;X&#x2264;5</td><td align="left" valign="top">883 (62.0)</td><td align="left" valign="top">331 (62.0)</td><td align="left" valign="top">219 (60.0)</td><td align="left" valign="top">81 (60.0)</td><td align="left" valign="top">73 (37.4)</td><td align="left" valign="top">123 (40.0)</td><td align="left" valign="top" rowspan="3">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5&#x003C;X&#x2264;10</td><td align="left" valign="top">504 (35.0)</td><td align="left" valign="top">191 (36.0)</td><td align="left" valign="top">138 (38.0)</td><td align="left" valign="top">54 (40.0)</td><td align="left" valign="top">118 (61.0)</td><td align="left" valign="top">168 (55.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>10&#x003C;X</td><td align="left" valign="top">34 (2.4)</td><td align="left" valign="top">11 (2.1)</td><td align="left" valign="top">8 (2.2)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">4 (2.1)</td><td align="left" valign="top">14 (4.6)</td></tr><tr><td align="left" valign="top">PNI<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Negative</td><td align="left" valign="top">1320 (93.0)</td><td align="left" valign="top">398 (75.0)</td><td align="left" valign="top">339 (93.0)</td><td align="left" valign="top">102 (76.0)</td><td align="left" valign="top">125 (64.0)</td><td align="left" valign="top">176 (58.0)</td><td align="left" valign="top" rowspan="2">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Positive</td><td align="left" valign="top">101 (7.0)</td><td align="left" valign="top">135 (25.0)</td><td align="left" valign="top">26 (7.1)</td><td align="left" valign="top">33 (24.4)</td><td align="left" valign="top">70 (35.9)</td><td align="left" valign="top">129 (42.3)</td></tr><tr><td align="left" valign="top">CEA<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup>, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Negative</td><td align="left" valign="top">933 (66.0)</td><td align="left" valign="top">305 (57.0)</td><td align="left" valign="top">236 (65.0)</td><td align="left" valign="top">77 (57.0)</td><td align="left" valign="top">145 (74.4)</td><td align="left" valign="top">208 (68.0)</td><td align="left" valign="top" rowspan="2">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Positive</td><td align="left" valign="top">488 (34.0)</td><td align="left" valign="top">228 (43.0)</td><td align="left" valign="top">129 (35.0)</td><td align="left" valign="top">58 (43.0)</td><td align="left" valign="top">50 (25.6)</td><td align="left" valign="top">97 (31.8)</td></tr><tr><td align="left" valign="top" colspan="8">Clinical T stage, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1</td><td align="left" valign="top">220 (15.0)</td><td align="left" valign="top">37 (6.9)</td><td align="left" valign="top">41 (11.2)</td><td align="left" valign="top">8 (5.9)</td><td align="left" valign="top">22 (11.3)</td><td align="left" valign="top">26 (8.5)</td><td align="left" valign="top" rowspan="4">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2</td><td align="left" valign="top">243 (17.1)</td><td align="left" valign="top">70 (13.1)</td><td align="left" valign="top">80 (21.9)</td><td align="left" valign="top">14 (10.4)</td><td align="left" valign="top">62 (31.8)</td><td align="left" valign="top">66 (21.6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3</td><td align="left" valign="top">778 (55.0)</td><td align="left" valign="top">339 (64.0)</td><td align="left" valign="top">192 (53.0)</td><td align="left" valign="top">89 (65.9)</td><td align="left" valign="top">92 (47.2)</td><td align="left" valign="top">166 (54)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4</td><td align="left" valign="top">179 (13.0)</td><td align="left" valign="top">87 (16.3)</td><td align="left" valign="top">52 (14.3)</td><td align="left" valign="top">24 (17.8)</td><td align="left" valign="top">19 (9.7)</td><td align="left" valign="top">47 (15.4)</td></tr><tr><td align="left" valign="top" colspan="8">N stage, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>0</td><td align="left" valign="top">879 (62.0)</td><td align="left" valign="top">1 (0.2)</td><td align="left" valign="top">227 (62.0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">116 (59.0)</td><td align="left" valign="top">141 (46.0)</td><td align="left" valign="top" rowspan="3">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1</td><td align="left" valign="top">542 (38.0)</td><td align="left" valign="top">394 (74.0)</td><td align="left" valign="top">138 (38.0)</td><td align="left" valign="top">103 (76.0)</td><td align="left" valign="top">50 (25.6)</td><td align="left" valign="top">100 (33.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">138 (26.0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">32 (23.7)</td><td align="left" valign="top">29 (14.9)</td><td align="left" valign="top">64 (21.0)</td></tr><tr><td align="left" valign="top" colspan="8">Liver metastasis, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Negative</td><td align="left" valign="top">1393 (98.0)</td><td align="left" valign="top">491 (92.0)</td><td align="left" valign="top">361 (99.0)</td><td align="left" valign="top">124 (92.0)</td><td align="left" valign="top">191 (98.0)</td><td align="left" valign="top">301 (99.0)</td><td align="left" valign="top" rowspan="2">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Positive</td><td align="left" valign="top">28 (2.0)</td><td align="left" valign="top">42 (7.9)</td><td align="left" valign="top">4 (1.1)</td><td align="left" valign="top">11 (8.2)</td><td align="left" valign="top">4 (2.1)</td><td align="left" valign="top">4 (1.3)</td></tr><tr><td align="left" valign="top" colspan="8">Histological type, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Adenocarcinoma</td><td align="left" valign="top">1378 (97.0)</td><td align="left" valign="top">489 (92.0)</td><td align="left" valign="top">359 (72.0)</td><td align="left" valign="top">126 (25.0)</td><td align="left" valign="top">193 (39.0)</td><td align="left" valign="top">301 (60.0)</td><td align="left" valign="top" rowspan="3">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Mucinous<break/>/signet ring cell</td><td align="left" valign="top">29 (2.0)</td><td align="left" valign="top">35 (6.6)</td><td align="left" valign="top">5 (1.0)</td><td align="left" valign="top">5 (1.0)</td><td align="left" valign="top">2 (0.4)</td><td align="left" valign="top">4 (0.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Others</td><td align="left" valign="top">4 (0.3)</td><td align="left" valign="top">9 (1.7)</td><td align="left" valign="top">1 (0.2)</td><td align="left" valign="top">4 (0.8)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top" colspan="8">Differentiation extent, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Poorly differentiated/ undifferentiated</td><td align="left" valign="top">1 (0.1)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">10 (5.1)</td><td align="left" valign="top">23 (7.5)</td><td align="left" valign="top" rowspan="3">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Moderately differentiated</td><td align="left" valign="top">1421 (100.0)</td><td align="left" valign="top">532 (99.81)</td><td align="left" valign="top">365 (100.0)</td><td align="left" valign="top">135 (100.0)</td><td align="left" valign="top">152 (78.0)</td><td align="left" valign="top">236 (77.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Well-differentiated</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">33 (16.9)</td><td align="left" valign="top">46 (15.1)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>PNI: perineural invasion.</p></fn><fn id="table1fn2"><p><sup>b</sup>CEA: carcinoembryonic antigen.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Risk Factors for Lymph Node Metastasis</title><p>Using the nonzero coefficients from the LASSO logistic regression model as a guide, we applied both the LASSO method and the multivariate logistic regression model to identify the risk factors associated with LNM in patients with RC, as depicted in <xref ref-type="fig" rid="figure2">Figure 2</xref>. The penalty parameters &#x03BB; for the internal and external validation sets were found to be 0.0047 and 0.0036, respectively. Univariate and multivariate analyses yielded the forest plots for both the internal and external validation sets, as shown in <xref ref-type="fig" rid="figure3">Figure 3</xref>. Key predictors from univariate and multivariate analyses are detailed in <xref ref-type="table" rid="table2">Table 2</xref>. This analysis assists in pinpointing factors that potentially contribute to LNM, marking a pivotal step in comprehending disease progression and in formulating evidence-based treatment strategies. The multivariate logistic regression analysis of the external validation set identified number of peritumoral and total lymph nodes examined (<italic>P</italic>=.022), tumor length (<italic>P</italic>&#x003C;.001), neural invasion (<italic>P</italic>&#x003C;.001), clinical T stage (<italic>P</italic>&#x003C;.001), N stage (<italic>P</italic>&#x003C;.01), and tumor differentiation grade (<italic>P</italic>&#x003C;.01) as independent risk factors for LNM.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Feature selection using LASSO logistic regression. (A) Adjustment parameters in the LASSO logistic regression for both the training and internal validation sets (C) are selected using 10-fold cross-validation with minimum criteria. The relationship between binomial deviation and the logarithm of the penalty parameter (&#x03BB;) is depicted. The optimal &#x03BB; is indicated by a black vertical line, determined by the minimum criterion and the minimum standard error of &#x03BB;. (B) LASSO coefficients for 12 clinical factors in the training set and internal validation set (D) are presented, illustrating the coefficient profiles against the logarithm of &#x03BB;. LASSO: Least Absolute Shrinkage and Selection Operator; MSE: mean squared error.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e73765_fig02.png"/></fig><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Univariate and multivariate logistic regression analyses yielded a forest plot of odds ratios. (A) In the external validation cohort, single-factor analysis yielded a forest plot of odds ratio. (B) In the external validation cohort, multiple factor analysis yielded a forest plot of odds ratios. (C) In the internal validation cohort, single-factor analysis yielded a forest plot of odds ratios. (D) In the internal validation cohort, multiple factor analysis yielded a forest plot of odds ratios. CEA: carcinoembryonic antigen; PNI: perineural invasion<bold>.</bold></p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e73765_fig03.png"/></fig><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Univariate and multivariate logistic regression analysis.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Characteristics</td><td align="left" valign="bottom" colspan="3">Univariate logistic</td><td align="left" valign="bottom" colspan="3">Multivariable logistics</td></tr><tr><td align="left" valign="bottom" colspan="2">OR<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> (CI)</td><td align="left" valign="top"><italic>P</italic> value</td><td align="left" valign="bottom" colspan="2">OR (CI)</td><td align="left" valign="top"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="7">Internal validation cohort</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sex</td><td align="left" valign="top" colspan="2">0.382 (0.347&#x2010;0.420)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">0.999 (0.803&#x2010;1.244)</td><td align="left" valign="top">.99</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age</td><td align="left" valign="top" colspan="2">0.888 (0.879&#x2010;0.896)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">1.046 (1.002&#x2010;1.092)</td><td align="left" valign="top">.09</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total number of lymph nodes</td><td align="left" valign="top" colspan="2">0.962 (0.958&#x2010;0.966)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">1.017 (1.006&#x2010;1.028)</td><td align="left" valign="top">.01</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Tumor length</td><td align="left" valign="top" colspan="2">0.847 (0.834&#x2010;0.860)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">0.828 (0.786&#x2010;0.873)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PNI<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="top" colspan="2">1.347 (1.106&#x2010;1.640)</td><td align="left" valign="top">.01</td><td align="left" valign="top" colspan="2">3.372 (2.472&#x2010;4.600)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CEA<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top" colspan="2">0.465 (0.412&#x2010;0.524)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">1.245 (0.992&#x2010;1.564)</td><td align="left" valign="top">.11</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Clinical T stage</td><td align="left" valign="top" colspan="2">0.741 (0.722&#x2010;0.761)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">0.711 (0.602&#x2010;0.839)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>N stage</td><td align="left" valign="top" colspan="2">1.193 (1.103&#x2010;1.290)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">1569.010 (300.065&#x2010;8209.112)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Liver metastasis</td><td align="left" valign="top" colspan="2">1.645 (1.131&#x2010;2.392)</td><td align="left" valign="top">.03</td><td align="left" valign="top" colspan="2">1.533 (0.931&#x2010;2.522)</td><td align="left" valign="top">.16</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Differentiation extent</td><td align="left" valign="top" colspan="2">0.375 (0.347&#x2010;0.405)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">0.001 (0.000&#x2010;0.008)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="7">External validation cohort</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sex</td><td align="left" valign="top" colspan="2">0.383 (0.344&#x2010;0.425)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">1.042 (0.821&#x2010;1.323)</td><td align="left" valign="top">.78</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age</td><td align="left" valign="top" colspan="2">0.887 (0.878&#x2010;0.897)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">1.055 (1.007&#x2010;1.105)</td><td align="left" valign="top">.06</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total number of lymph nodes</td><td align="left" valign="top" colspan="2">0.962 (0958&#x2010;0.967)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">1.016 (1.005&#x2010;1.028)</td><td align="left" valign="top">.02</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Tumor length</td><td align="left" valign="top" colspan="2">0.848 (0.834&#x2010;0.862)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">0.843 (0.797&#x2010;0.891)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PNI</td><td align="left" valign="top" colspan="2">1.337 (1.077&#x2010;1.660)</td><td align="left" valign="top">.03</td><td align="left" valign="top" colspan="2">3.142 (2.246&#x2010;4.397)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CEA</td><td align="left" valign="top" colspan="2">0.465 (0.408&#x2010;0.531)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">1.295 (1.012&#x2010;1.655)</td><td align="left" valign="top">.09</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Clinical T stage</td><td align="left" valign="top" colspan="2">0.740 (0.718&#x2010;0.762)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">0.681 (0.568&#x2010;0.815)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>N stage</td><td align="left" valign="top" colspan="2">1.185 (1.088&#x2010;1.292)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">1307.798 (249.635&#x2010;6849.973)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Liver metastasis</td><td align="left" valign="top" colspan="2">1.400 (0.945&#x2010;2.075)</td><td align="left" valign="top">.16</td><td align="left" valign="top" colspan="2">Ref (Ref)</td><td align="left" valign="top">Ref</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Differentiation extent</td><td align="left" valign="top" colspan="2">0.374 (0.344&#x2010;0.407)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">0.002 (0.000&#x2010;0.009)</td><td align="left" valign="top">&#x003C;.001</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>OR: odds ratio. </p></fn><fn id="table2fn2"><p><sup>b</sup>PNI: perineural invasion.</p></fn><fn id="table2fn3"><p><sup>c</sup>CEA: carcinoembryonic antigen. </p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Optimal Predictive Model Selection</title><sec id="s3-3-1"><title>Feature Weight Ranking</title><p>Based on the penalty coefficient obtained from cross-validation, we selected features with coefficients greater than 0 and represented them using feature weights. The features in the training set and internal validation set are ranked as follows: Clinical N staging, receipt of chemotherapy or radiotherapy, neural invasion, tumor histology, tumor length, number of peritumoral and total lymph nodes examined, liver metastasis, age, CEA, clinical T stage, and tumor length (<xref ref-type="fig" rid="figure4">Figure 4A</xref>). In contrast, the features in the training set and external validation set are ranked as follows: Clinical N staging, receipt of chemotherapy or radiotherapy, neural invasion, tumor histology, tumor length, liver metastasis, gender, differentiation degree, age, CEA, tumor length, and clinical T stage (<xref ref-type="fig" rid="figure4">Figure 4B</xref>).</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>The weight of clinical features in the lymph node prediction model of rectal cancer. (A) Internal validation cohort. (B) External validation cohort. CEA: carcinoembryonic antigen; PNI: perineural invasion.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e73765_fig04.png"/></fig></sec><sec id="s3-3-2"><title>Model Performance Evaluation</title><p>In the internal validation cohort (n=500), the 11 ML models demonstrated a wide range of discriminative performance (AUC range 0.859&#x2010;0.964). RF (AUC 0.964, 95% CI 0.950&#x2010;0.978) and ET (AUC 0.964, 95% CI 0.949&#x2010;0.978) achieved the highest AUC values, with sensitivities of 0.881 and 0.983 and specificities of 0.900 and 0.828, respectively (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). NB showed the lowest AUC (0.859, 95% CI 0.830&#x2010;0.887) despite perfect sensitivity (1.000) but with specificity limited to 0.566. The XGB model yielded an AUC of 0.942 (95% CI 0.925&#x2010;0.959), sensitivity of 0.795, and specificity of 0.913. In the external cohort (n=500), Gradient Boosting exhibited the highest AUC (0.838, 95% CI 0.801&#x2010;0.875) but the lowest sensitivity (0.685); XGB (AUC 0.832, 95% CI 0.794&#x2010;0.869) and LGBM (AUC 0.831, 95% CI 0.793&#x2010;0.869) ranked second and third, with XGB demonstrating the smallest sensitivity-specificity gap (<italic>&#x0394;</italic>=0.113) and an <italic>F</italic><sub>1</sub>-score of 0.690 (<xref ref-type="table" rid="table3">Table 3</xref>).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Comprehensive performance metrics of machine learning models for lymph node metastasis prediction in patients with rectal cancer across validation cohorts.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Model name</td><td align="left" valign="bottom" colspan="2">AUC<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> (95% CI)</td><td align="left" valign="bottom">Accuracy</td><td align="left" valign="bottom">Sensitivity</td><td align="left" valign="bottom">Specificity</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub><sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="7">Internal validation cohort</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LR<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top" colspan="2">0.924 (0.9043&#x2010;0.9435)</td><td align="left" valign="top">0.849</td><td align="left" valign="top">0.824</td><td align="left" valign="top">0.858</td><td align="left" valign="top">0.747</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Naive Bayes</td><td align="left" valign="top" colspan="2">0.859 (0.8300&#x2010;0.8873)</td><td align="left" valign="top">0.684</td><td align="left" valign="top">1.000</td><td align="left" valign="top">0.566</td><td align="left" valign="top">0.632</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SVM<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="top" colspan="2">0.924 (0.9038&#x2010;0.9446)</td><td align="left" valign="top">0.801</td><td align="left" valign="top">0.920</td><td align="left" valign="top">0.756</td><td align="left" valign="top">0.715</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>KNN<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="left" valign="top" colspan="2">0.920 (0.8949&#x2010;0.9442)</td><td align="left" valign="top">0.869</td><td align="left" valign="top">0.864</td><td align="left" valign="top">0.871</td><td align="left" valign="top">0.781</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Random Forest</td><td align="left" valign="top" colspan="2">0.964 (0.9502&#x2010;0.9784)</td><td align="left" valign="top">0.895</td><td align="left" valign="top">0.881</td><td align="left" valign="top">0.900</td><td align="left" valign="top">0.820</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Extremely Randomized Trees</td><td align="left" valign="top" colspan="2">0.964 (0.9491&#x2010;0.97820)</td><td align="left" valign="top">0.870</td><td align="left" valign="top">0.983</td><td align="left" valign="top">0.828</td><td align="left" valign="top">0.805</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost</td><td align="left" valign="top" colspan="2">0.942 (0.9251&#x2010;0.9585)</td><td align="left" valign="top">0.881</td><td align="left" valign="top">0.795</td><td align="left" valign="top">0.913</td><td align="left" valign="top">0.784</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LGBM<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td><td align="left" valign="top" colspan="2">0.943 (0.9266&#x2010;0.9602)</td><td align="left" valign="top">0.872</td><td align="left" valign="top">0.835</td><td align="left" valign="top">0.886</td><td align="left" valign="top">0.780</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gradient Boosting</td><td align="left" valign="top" colspan="2">0.909 (0.8889&#x2010;0.9298)</td><td align="left" valign="top">0.722</td><td align="left" valign="top">1.000</td><td align="left" valign="top">0.619</td><td align="left" valign="top">0.662</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>AdaBoost</td><td align="left" valign="top" colspan="2">0.913 (0.8924&#x2010;0.9330)</td><td align="left" valign="top">0.756</td><td align="left" valign="top">0.989</td><td align="left" valign="top">0.669</td><td align="left" valign="top">0.688</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MLP<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup></td><td align="left" valign="top" colspan="2">0.935 (0.9168&#x2010;0.9525)</td><td align="left" valign="top">0.813</td><td align="left" valign="top">0.932</td><td align="left" valign="top">0.769</td><td align="left" valign="top">0.731</td></tr><tr><td align="left" valign="top" colspan="7">External validation cohort</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LR<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top" colspan="2">0.821 (0.7817&#x2010;0.8611)</td><td align="left" valign="top">0.760</td><td align="left" valign="top">0.702</td><td align="left" valign="top">0.789</td><td align="left" valign="top">0.667</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Naive Bayes</td><td align="left" valign="top" colspan="2">0.749 (0.7027&#x2010;0.7949)</td><td align="left" valign="top">0.662</td><td align="left" valign="top">0.845</td><td align="left" valign="top">0.567</td><td align="left" valign="top">0.631</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SVM<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="top" colspan="2">0.814 (0.7734&#x2010;0.8539)</td><td align="left" valign="top">0.764</td><td align="left" valign="top">0.696</td><td align="left" valign="top">0.799</td><td align="left" valign="top">0.669</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>KNN<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="left" valign="top" colspan="2">0.781 (0.7383&#x2010;0.8230)</td><td align="left" valign="top">0.697</td><td align="left" valign="top">0.756</td><td align="left" valign="top">0.666</td><td align="left" valign="top">0.630</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Random Forest</td><td align="left" valign="top" colspan="2">0.759 (0.7137&#x2010;0.8047)</td><td align="left" valign="top">0.725</td><td align="left" valign="top">0.655</td><td align="left" valign="top">0.762</td><td align="left" valign="top">0.620</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Extremely Randomized Trees</td><td align="left" valign="top" colspan="2">0.735 (0.6894&#x2010;0.7813)</td><td align="left" valign="top">0.672</td><td align="left" valign="top">0.762</td><td align="left" valign="top">0.625</td><td align="left" valign="top">0.614</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost</td><td align="left" valign="top" colspan="2">0.832 (0.7943&#x2010;0.8695)</td><td align="left" valign="top">0.752</td><td align="left" valign="top">0.708</td><td align="left" valign="top">0.821</td><td align="left" valign="top">0.690</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LGBM<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td><td align="left" valign="top" colspan="2">0.831 (0.7928&#x2010;0.8693)</td><td align="left" valign="top">0.776</td><td align="left" valign="top">0.732</td><td align="left" valign="top">0.799</td><td align="left" valign="top">0.691</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gradient Boosting</td><td align="left" valign="top" colspan="2">0.838 (0.8012&#x2010;0.8750)</td><td align="left" valign="top">0.796</td><td align="left" valign="top">0.685</td><td align="left" valign="top">0.854</td><td align="left" valign="top">0.697</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>AdaBoost</td><td align="left" valign="top" colspan="2">0.815 (0 7761&#x2010;0.8536)</td><td align="left" valign="top">0.768</td><td align="left" valign="top">0.589</td><td align="left" valign="top">0.861</td><td align="left" valign="top">0.635</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MLP<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup></td><td align="left" valign="top" colspan="2">0.830 (0.7921&#x2010;0.8671)</td><td align="left" valign="top">0.772</td><td align="left" valign="top">0.649</td><td align="left" valign="top">0.836</td><td align="left" valign="top">0.661</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>AUC: Area Under the receiver operating characteristic curve. </p></fn><fn id="table3fn2"><p><sup>b</sup>F1 Harmonic mean of precision and recall.</p></fn><fn id="table3fn3"><p><sup>c</sup>LR: Logistic Regression.</p></fn><fn id="table3fn4"><p><sup>d</sup>SVM: Support Vector Machine.</p></fn><fn id="table3fn5"><p><sup>e</sup>KNN: K-Nearest Neighbors.</p></fn><fn id="table3fn6"><p><sup>f</sup>LGBM: Light Gradient Boosting Machine.</p></fn><fn id="table3fn7"><p><sup>g</sup>MLP: Multilayer Perceptron.</p></fn></table-wrap-foot></table-wrap><p>The relative importance of model variables varies between the internal and external validation sets, depending on the characteristics. In the internal validation set, the clinical N-stage is the most important variable for the RF, ET, and XGB models. However, for the LGBM model, age and tumor length are the most important variables. In the external validation set, tumor length, clinical N-stage, and nerve invasion are among the top three important features for both the GB and LGBM models (<xref ref-type="fig" rid="figure5">Figure 5</xref>). We assessed feature correlations using a heatmap (<xref ref-type="fig" rid="figure6">Figure 6</xref>). In both validation sets, no significant correlations were observed, indicating the absence of collinearity and the independence of variables.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Rank the relative importance of each input variable. (A) Internal validation cohort and (B) external validation cohort. CEA: carcinoembryonic antigen; LGBM: Light Gradient Boosting Machine; PNI: perineural invasion; XGB: XGBoost<bold>.</bold></p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e73765_fig05.png"/></fig><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Correlation of clinical features. (A) Internal validation cohort and (B) external validation cohort.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e73765_fig06.png"/></fig></sec></sec><sec id="s3-4"><title>Calibration Curve Analysis</title><p>Calibration curves (<xref ref-type="fig" rid="figure7">Figure 7</xref>) demonstrated the agreement between predicted probabilities and observed event frequencies. In the internal cohort (<xref ref-type="fig" rid="figure7">Figure 7A</xref>), the XGB curve adhered closest to the ideal diagonal (45&#x00B0; reference line) throughout, with minimal deviation from actual frequencies in the 0.3&#x2010;0.7 probability range; ET exhibited consistent positioning above the diagonal at &#x003E;0.8 probabilities (systematic overestimation), while RF deviated slightly below the diagonal between 0.6 and 0.8 (mild underestimation). In the external cohort (<xref ref-type="fig" rid="figure7">Figure 7B</xref>), XGB maintained the smallest overall deviation (closest to the diagonal); GB showed systematic distribution below the diagonal at 0.4&#x2010;0.6 probabilities (underestimation), and LGBM positioned above the diagonal at &#x003C;0.3 thresholds (overestimation).</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Calibration curve of lymph node metastasis prediction in the internal validation cohort (A) and external validation cohort (B) of patients. The dashed diagonal line represents perfect calibration where predicted probability equals actual probability. KNN: K-Nearest Neighbors; LR: Logistic Regression; MLP: Multilayer Perceptron; SVM: Support Vector Machine.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e73765_fig07.png"/></fig></sec><sec id="s3-5"><title>Decision Curve Analysis</title><p>We evaluated the net benefits of the models in both validation sets using DCA (<xref ref-type="fig" rid="figure8">Figure 8</xref>). In the internal validation set, DCA indicates that the XGB model has a net benefit similar to that of the XGB and LGBM models at lower threshold probabilities (approximately 0&#x2010;0.3). Furthermore, the XGB model maintains net benefit across a broader range of threshold probabilities (approximately 0&#x2010;0.9). Between 0.7 and 0.9, where the net benefits of other models decrease, the net benefit of the XGB model still sustains. The RF model shows net benefit within the range of 0&#x2010;0.8. The ET model has a net benefit from 0.1 to 0.75. The LGBM model demonstrates net benefit in the range of 0.1&#x2010;0.7. In the external validation set, DCA reveals that the XGB model&#x2019;s net benefit is higher than that of the GB and LGBM models when the threshold probability is between 0.3 and 0.4. Furthermore, the XGB model maintains net benefit across a broader range of threshold probabilities (approximately 0&#x2010;0.4). The GB model shows net benefit within the range of 0.1&#x2010;0.3, while the LGBM model has net benefit from 0.1 to 0.35.</p><fig position="float" id="figure8"><label>Figure 8.</label><caption><p>Decision curve analysis of lymph node metastasis prediction in the internal validation cohort (A) and external validation cohort (B) of patients with RC. DCA: decision curve analysis.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e73765_fig08.png"/></fig><sec id="s3-5-1"><title>Final Model Selection</title><p>XGB was selected as the definitive predictive model through comprehensive evaluation of cross-cohort discriminative stability, calibration fidelity, and clinical utility. The model achieved AUC values of 0.942 (95% CI 0.925&#x2010;0.959) in the internal cohort and 0.832 (95% CI 0.794&#x2010;0.869) in the external cohort, with a reduction in AUC of -0.110 between cohorts. This reduction was smaller than those observed for RF (&#x2212;0.205), ET (&#x2212;0.229), and GB (&#x2212;0.071). Calibration curves demonstrated closest alignment to the ideal diagonal in the internal cohort (minimal deviation at 0.3&#x2010;0.7 probabilities), whereas ET systematically overestimated risk at probabilities &#x003E;0.8 and RF underestimated at 0.6&#x2010;0.8. In the external cohort, XGB maintained minimal deviation, GB underestimated at 0.4&#x2010;0.6 probabilities, and LGBM overestimated at thresholds &#x003C;0.3. DCA revealed that XGB sustained the broadest net benefit range (0&#x2010;0.9) in the internal validation set, outperforming comparison models at 0.7&#x2010;0.9 thresholds. In the external set, it maintained net benefit across 0&#x2010;0.4 thresholds and yielded higher net benefit than GB and LGBM at 0.3&#x2010;0.4 probability thresholds.</p></sec><sec id="s3-5-2"><title>Confusion Matrix Analysis</title><p><xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> displays the confusion matrices of the XGB model in the internal (A) and external (B) validation cohorts. In the internal cohort (n=500), the model correctly identified 333 LNM-negative cases (true negative) and 107 metastasis-positive cases (true positive), with 32 false positive and 28 false negative (FN) predictions. In the external cohort (n=500), it detected 160 true negative and 216 true positive cases, while generating 35 false positive and 89 FN classifications. The positive predictive value was 77% (107/139) in the internal cohort and 86.1% (216/251) in the external cohort; the negative predictive values were 92.2% (333/361) and 64.3% (160/249), respectively.</p></sec></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The main findings demonstrate that the XGB model achieved optimal performance in predicting RC LNM, with tumor differentiation grade, clinical T stage, tumor length, neural invasion, N stage, and total lymph node count identified as independent predictors. RC LNM significantly impacts patient prognosis, necessitating accurate prediction of LNM for the development of effective treatment plans. While traditional diagnostic methods offer some insights, their accuracy is constrained. This study employed ML algorithms to develop 11 prediction models for RC LNM. We extracted clinical data from the SEER database for the training and internal validation sets. Additionally, we acquired relevant clinical data from CT scans at the author&#x2019;s hospital, utilizing precise pixel-level annotation and measurement for the external validation set. We filtered clinical data using the LASSO, univariate, and multivariate logistic regression analyses to develop and validate a predictive model for LNM. The selection of the optimal model was determined through comparative analysis of internal and external validation sets, leveraging the AUC, calibration curves, and DCA for precise assessment.</p><p>In this study, we successfully developed and validated 11 ML models for predicting LNM in RC. This study revealed three key findings. First, the six independent predictors of LNM include tumor differentiation, clinical T stage, N stage, tumor length, total number of lymph nodes, and neural invasion. Second, among the 11 constructed ML models for predicting LNM, 10 models had an AUC greater than 0.9 in the internal validation set and 7 models had an AUC greater than 0.80 in the external validation set. Lastly, following a comparison of the models&#x2019; performance and their validation set discrepancies, the XGB model emerged as the most suitable among the 11 models.</p></sec><sec id="s4-2"><title>Comparison With Prior Studies</title><p>Utilizing LASSO logistic regression and multivariate analysis, we identified tumor differentiation, clinical T stage, N stage, tumor length, neural invasion, and total number of lymph nodes as the key clinical predictive factors. Low tumor differentiation, advanced clinical T stage, advanced N stage, increased tumor length, and neural invasion are significantly associated with LNM. Identifying these factors is essential to comprehend the tumor&#x2019;s biological behavior and invasive potential. Variations in feature importance across models (<xref ref-type="fig" rid="figure5">Figure 5</xref>) arise from the following factors: First, clinical data encompass both continuous (eg, tumor length) and categorical variables (eg, pathological grade), where XGB&#x2019;s exact splitting favors continuous features while LGBM&#x2019;s histogram-based algorithm optimizes high-dimensional sparse feature processing. Second, regularization differences lead to distinct weight allocation&#x2014;XGB&#x2019;s combined L1/L2 regularization strictly controls overfitting, whereas LGBM&#x2019;s leaf-wise growth prioritizes locally significant features. Third, although LASSO-based feature selection reduced dimensionality, residual feature correlations are differentially processed. These inherent algorithmic variations confirm that no single model can fully capture feature relationships, necessitating multimodel comparison for complementary insights. Poorly differentiated tumor cells show greater invasiveness and a higher likelihood of LNM. A study by Saraste et al [<xref ref-type="bibr" rid="ref24">24</xref>] involving 1664 patients with RC from the Swedish Rectal Cancer Registry between 2007 and 2010 concluded through multivariate analysis that poorly differentiated tumors are a significant risk factor for LNM, aligning with the findings of this study. Advanced clinical T stage is a direct indicator of deeper tumor invasion. A study by von den Gr&#x00FC;n et al [<xref ref-type="bibr" rid="ref25">25</xref>], which involved a binary logistic regression multivariate analysis of 776 patients with RC, revealed that advanced T stage is a significant prognostic factor for LNM in RC. Previous studies [<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref27">27</xref>] have established that clinical T stage and N stage significantly influence the prognosis and treatment of RC, aligning with the results of the univariate and multivariate analyses presented in <xref ref-type="table" rid="table2">Table 2</xref>. Increased tumor length may afford tumor cells increased opportunities to interact with adjacent lymph nodes. In a study [<xref ref-type="bibr" rid="ref28">28</xref>] that employed 7 clinical parameters as independent prognostic factors to develop a nomogram prediction model, 6484 patients with RC from the SEER database were analyzed using Cox proportional hazards regression. This analysis identified independent prognostic factors such as T stage and tumor length. However, unlike this study, the aforementioned research did not include an external validation cohort. The presence of neural invasion indicates a high level of tumor invasiveness and the potential for dissemination along neural pathways. Neural invasion is a significant factor in both univariate and multivariate analyses, as well as in evaluating the importance of model features. Studies by Ueno et al [<xref ref-type="bibr" rid="ref29">29</xref>] and Song et al [<xref ref-type="bibr" rid="ref30">30</xref>] have shown that neural invasion is a significant factor in LNM in RC. The total number of perirectal examined lymph nodes, as a key factor, plays a crucial role in accurate staging and prognostic assessment of RC. Guan et al [<xref ref-type="bibr" rid="ref31">31</xref>] found that a higher number of perirectal examined lymph nodes in RC is linked to more accurate lymph node staging and improved survival. Analyzing data from stage I to III resected RC in multi-institutional Chinese and US SEER databases revealed that an increasing number of perirectal examined lymph nodes significantly raises the proportion of cases shifting from lymph node-negative to lymph node-positive. Furthermore, after adjusting for confounding factors, overall survival consistently improves.</p><p>Utilizing the selected clinical characteristics, we developed a predictive model for LNM in RC. We extracted a cohort of 2454 patients with RC from the SEER database and randomly selected 500 cases for the internal validation cohort. The remaining 1954 cases served as the training cohort, while an additional 500 cases from the author&#x2019;s hospital constituted the external validation cohort. In the internal validation set, the RF and ET models achieved an AUC of 0.964. The LGBM and XGB models also performed remarkably well, with AUC values of 0.943 and 0.942, respectively. Among all models, the NB model had the lowest AUC of 0.859, while the remaining 10 models all exceeded 0.9. In the external validation set, the GB model demonstrated the best performance with an AUC of 0.838, followed by the XGB and LGBM models, which reached AUC values of 0.832 and 0.831, respectively. In the study by Guan et al [<xref ref-type="bibr" rid="ref18">18</xref>], 6578 patients with RC were enrolled across several institutions, including the Cancer Hospital of the Chinese Academy of Medical Sciences, Peking Union Medical College, Changhai Hospital of Naval Medical University, and the Second Affiliated Hospital of Harbin Medical University. The XGB model was identified as the optimal in their study, achieving AUCs of 0.78 and 0.71 across two validation cohorts. In contrast, our study&#x2019;s XGB model showed superior performance, achieving an AUC of 0.942 in the internal validation cohort and 0.832 in the external validation cohort. In the external validation cohort, LNM status, clinical T and N stages, and tumor length were ascertained through the annotation of CT imaging combined with clinical data. However, across both internal and external validation cohorts, most models in this study demonstrated relatively good performance across various validation sets, with the AUCs of the optimal models all exceeding 0.80, indicating a certain level of accuracy and reliability. These results demonstrate that our models exhibit high stability and generalizability across diverse datasets. Regardless of whether the patients with RC are in the United States or China, our models show accurate predictive ability for forecasting LNM in RC. Second, the LNM status and associated details in the 500 case records from the hospital were meticulously annotated by 50 physicians. These annotations were made by integrating clinical and pathological data using imaging software on enhanced CT scans. Following the initial annotation, two radiologists, possessing 8 and 20 years of experience, respectively, conducted a review and confirmation of the annotations. The multi-physician annotation strategy, which incorporates clinical pathology data and imaging software, boosts the reliability of the data and, in turn, elevates the predictive accuracy of the model. A similar approach was taken in Liu et al&#x2019;s study [<xref ref-type="bibr" rid="ref32">32</xref>], wherein integrating clinical data with ML for magnetic resonance image analysis elevated the AUC value of single-region radiomics from 0.702 to 0.827. In a study by Wan et al [<xref ref-type="bibr" rid="ref33">33</xref>], an automated segmentation method using deep learning demonstrated potential for predicting LNM in RC. However, our manual annotation method ensures precise localization of small lymph nodes (diameter &#x003C; 5 mm) and ambiguous lesions, which are crucial for RC staging but challenging for automated tools. This strategy is particularly valuable in resource-limited environments lacking artificial intelligence infrastructure. Although magnetic resonance imaging is widely regarded as the gold standard for local T-staging and lymph node status assessment in RC, in this study, we chose to analyze CT images due to the diversity of data sources and the prevalence of CT images. CT scanning is less costly and quicker. Particularly, in resource-limited medical settings, CT remains a routine examination tool that can effectively provide important information on lymph node status and tumor staging. This highlights the benefits of integrating both clinical data and imaging information into ML models. While the annotation of CT images and their incorporation are standard practices in traditional radiomics and the development of deep learning models, they are less common in models derived solely from clinical data. This study shows that employing imaging software to annotate CT scans and extract data for clinical predictive models not only yields high-quality data for model development but also presents an innovative approach to data acquisition and processing for future studies.</p></sec><sec id="s4-3"><title>Strengths and Limitations</title><p>A key innovation of this study is the comprehensive utilization of diverse advanced ML algorithms, coupled with the validation of the model&#x2019;s performance across data from various sources. Our models have shown strong generalization capabilities, as evidenced by their performance in both internal and external validation cohorts. Additionally, using LASSO logistic regression for feature selection enabled us to pinpoint key clinical predictive factors. This approach not only bolstered the model&#x2019;s predictive accuracy but also improved its interpretability. We presented the importance of features in a ranked order and investigated the correlations among variables with a correlation heatmap. Finally, the optimal model was ascertained through a comprehensive, evidence-based methodology integrating multimetric performance evaluation (AUC, 95% CI, accuracy, sensitivity, specificity, and <italic>F</italic><sub>1</sub>-score), augmented by high-resolution assessment of calibration curves and DCA.</p><p>Despite strong performance in both the internal and external validation sets, our model in this study has some limitations. First, the dataset is constrained by its population representativeness. The SEER database comprises exclusively American cases, and the dataset from the author&#x2019;s hospital consists solely of Chinese cases. This could impact the model&#x2019;s ability to generalize across diverse populations. In Shulman et al.&#x2019;s study [<xref ref-type="bibr" rid="ref34">34</xref>], which analyzed 34,500 patients with RC, it was found that patients of different races exhibited varying lymph node statuses. Second, the model did not include variables like BMI, alpha-fetoprotein, cancer antigen 125, carbohydrate antigen 19-9 [<xref ref-type="bibr" rid="ref18">18</xref>], which were previously explored in clinical information. This omission could impact the model&#x2019;s predictive accuracy and comprehensiveness. Future studies could further refine predictive accuracy by incorporating serum biomarkers (eg, BMI, carbohydrate antigen 19-9) and emerging indicators such as circulating tumor DNA, combined with radiomic features. Third, FN classifications warrant clinical attention. The FN rate was 20.7% (28/135) in the internal cohort and 29.2% (89/305) in the external cohort. Such misclassifications may lead to undertreatment (eg, omission of adjuvant chemotherapy or lymph node dissection), increasing risks of recurrence and metastasis. Crucially, while our model was developed to surpass conventional CT assessments, reducing FNs remains a priority for future optimization through biomarker integration or advanced imaging techniques. The study by Yu et al [<xref ref-type="bibr" rid="ref35">35</xref>] demonstrated that incorporating multicenter data and multimodal features significantly reduced FN rates, providing a feasible direction for refining our model. In this study, we utilized 11 ML methods. Future research could further enhance the model&#x2019;s predictive performance by integrating diverse ML algorithms and leveraging their unique strengths.</p></sec><sec id="s4-4"><title>Conclusions</title><p>In conclusion, this study successfully developed a ML-based risk prediction model for LNM in RC, validating its performance using both an internal and an external validation set. Through the analysis of extensive clinicopathological data, we identified tumor differentiation, clinical T stage, N stage, tumor length, neural invasion, and total number of lymph nodes as independent predictive factors. Among the 11 models evaluated, the XGB model demonstrated the best predictive performance. These models are anticipated to aid in clinical decision-making, offering vital insights for treatment selection and prognostic assessment of patients with RC. They hold significant clinical utility and scientific importance. Concurrently, the external validation set demonstrates that clinical data derived from CT imaging annotation and measurement are comparable to traditionally obtained clinical data and can effectively serve as a source of clinical data for ML applications.</p></sec></sec></body><back><ack><p>This research was collectively supported by The University Synergy Innovation Program of Anhui Province (Grant No. GXXT-2022-056) and the Key Project of the Department of Education Quality Engineering (Grant No. 2023jyxm1111). We thank the SEER (Surveillance, Epidemiology, and End Results) database and the First Affiliated Hospital of Anhui Medical University for providing patient data. The authors sincerely thank all the staff of our department for their help in data collection, sample annotation, and manuscript writing. The following is the list of personnel who have contributed to pixelated annotation: Jiarui Zhang, Xiaosu Li, Xiaoya Wang, Wenna Chai, Chuiyan Huang, Tianxu Yang, Jingwen Hu, Yiran Zhang, Ping Wang, Bingjing Zhu, Jiani Ma, Yuyan Ma.</p></ack><notes><sec><title>Data Availability</title><p>The raw data sets are available only upon reasonable request because of privacy and ethical restrictions. The data are not publicly available because of these restrictions. The code is available upon reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>WH and CL are primarily responsible for data organization, analysis, and model construction. ZW was in charge of literature search and data collection. WW provided guidance on data analysis. SW offered guidance on model construction and validation. BZ was responsible for literature quality assessment and manuscript review and served as the corresponding author. All authors have read and approved the manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AB</term><def><p>Ada-Boost</p></def></def-item><def-item><term id="abb2">AJCC</term><def><p>American Joint Committee on Cancer</p></def></def-item><def-item><term id="abb3">AUC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb4">CEA</term><def><p>carcinoembryonic antigen</p></def></def-item><def-item><term id="abb5">CT</term><def><p>computed tomography</p></def></def-item><def-item><term id="abb6">DCA</term><def><p>decision curve analysis</p></def></def-item><def-item><term id="abb7">ET</term><def><p>Extremely Randomized Trees</p></def></def-item><def-item><term id="abb8">GB</term><def><p>Gradient Boosting</p></def></def-item><def-item><term id="abb9">KNN</term><def><p>K-Nearest Neighbors</p></def></def-item><def-item><term id="abb10">LASSO</term><def><p>Least Absolute Shrinkage and Selection Operator</p></def></def-item><def-item><term id="abb11">LGBM</term><def><p>Light Gradient Boosting Machine</p></def></def-item><def-item><term id="abb12">LNM</term><def><p>lymph node metastasis</p></def></def-item><def-item><term id="abb13">LR</term><def><p>Logistic Regression</p></def></def-item><def-item><term id="abb14">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb15">MLP</term><def><p>Multilayer Perceptron</p></def></def-item><def-item><term id="abb16">NB</term><def><p>Naive Bayes</p></def></def-item><def-item><term id="abb17">PNI</term><def><p>Perineural Invasion</p></def></def-item><def-item><term id="abb18">RC</term><def><p>rectal cancer</p></def></def-item><def-item><term id="abb19">RF</term><def><p>Random Forest</p></def></def-item><def-item><term id="abb20">SEER</term><def><p>Surveillance, Epidemiology, and End Results</p></def></def-item><def-item><term id="abb21">SVM</term><def><p>Support Vector Machine</p></def></def-item><def-item><term id="abb22">XGBoost</term><def><p>Extreme Gradient Boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Siegel</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>KD</given-names> </name><name name-style="western"><surname>Goding Sauer</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Colorectal cancer statistics, 2020</article-title><source>CA Cancer J Clin</source><year>2020</year><month>05</month><volume>70</volume><issue>3</issue><fpage>145</fpage><lpage>164</lpage><pub-id pub-id-type="doi">10.3322/caac.21601</pub-id><pub-id pub-id-type="medline">32133645</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bray</surname><given-names>F</given-names> </name><name name-style="western"><surname>Laversanne</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sung</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Global cancer statistics 2022: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title><source>CA Cancer J Clin</source><year>2024</year><volume>74</volume><issue>3</issue><fpage>229</fpage><lpage>263</lpage><pub-id pub-id-type="doi">10.3322/caac.21834</pub-id><pub-id pub-id-type="medline">38572751</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Morgan</surname><given-names>E</given-names> </name><name name-style="western"><surname>Arnold</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gini</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Global burden of colorectal cancer in 2020 and 2040: incidence and mortality estimates from GLOBOCAN</article-title><source>Gut</source><year>2023</year><month>02</month><volume>72</volume><issue>2</issue><fpage>338</fpage><lpage>344</lpage><pub-id pub-id-type="doi">10.1136/gutjnl-2022-327736</pub-id><pub-id pub-id-type="medline">36604116</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liersch</surname><given-names>T</given-names> </name><name name-style="western"><surname>Langer</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ghadimi</surname><given-names>BM</given-names> </name><etal/></person-group><article-title>Lymph node status and TS gene expression are prognostic markers in stage II/III rectal cancer after neoadjuvant fluorouracil-based chemoradiotherapy</article-title><source>J Clin Oncol</source><year>2006</year><month>09</month><day>1</day><volume>24</volume><issue>25</issue><fpage>4062</fpage><lpage>4068</lpage><pub-id pub-id-type="doi">10.1200/JCO.2005.04.2739</pub-id><pub-id pub-id-type="medline">16943523</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kuru</surname><given-names>B</given-names> </name><name name-style="western"><surname>Camlibel</surname><given-names>M</given-names> </name><name name-style="western"><surname>Din&#x00E7;</surname><given-names>S</given-names> </name><name name-style="western"><surname>Erdem</surname><given-names>E</given-names> </name><name name-style="western"><surname>Alag&#x00F6;l</surname><given-names>H</given-names> </name></person-group><article-title>Prognostic factors affecting local recurrence and survival for operable rectal cancers</article-title><source>J Exp Clin Cancer Res</source><year>2002</year><month>09</month><volume>21</volume><issue>3</issue><fpage>329</fpage><lpage>335</lpage><pub-id pub-id-type="medline">12385573</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leibold</surname><given-names>T</given-names> </name><name name-style="western"><surname>Shia</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ruo</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Prognostic implications of the distribution of lymph node metastases in rectal cancer after neoadjuvant chemoradiotherapy</article-title><source>J Clin Oncol</source><year>2008</year><month>05</month><day>1</day><volume>26</volume><issue>13</issue><fpage>2106</fpage><lpage>2111</lpage><pub-id pub-id-type="doi">10.1200/JCO.2007.12.7704</pub-id><pub-id pub-id-type="medline">18362367</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Isaka</surname><given-names>N</given-names> </name><name name-style="western"><surname>Nozue</surname><given-names>M</given-names> </name><name name-style="western"><surname>Doy</surname><given-names>M</given-names> </name><name name-style="western"><surname>Fukao</surname><given-names>K</given-names> </name></person-group><article-title>Prognostic significance of perirectal lymph node micrometastases in Dukes&#x2019; B rectal carcinoma: an immunohistochemical study by CAM5.2</article-title><source>Clin Cancer Res</source><year>1999</year><month>08</month><volume>5</volume><issue>8</issue><fpage>2065</fpage><lpage>2068</lpage><pub-id pub-id-type="medline">10473087</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Govindarajan</surname><given-names>A</given-names> </name><name name-style="western"><surname>G&#x00F6;nen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Weiser</surname><given-names>MR</given-names> </name><etal/></person-group><article-title>Challenging the feasibility and clinical significance of current guidelines on lymph node examination in rectal cancer in the era of neoadjuvant therapy</article-title><source>J Clin Oncol</source><year>2011</year><month>12</month><day>1</day><volume>29</volume><issue>34</issue><fpage>4568</fpage><lpage>4573</lpage><pub-id pub-id-type="doi">10.1200/JCO.2011.37.2235</pub-id><pub-id pub-id-type="medline">21990400</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moons</surname><given-names>LMG</given-names> </name><name name-style="western"><surname>Bastiaansen</surname><given-names>BAJ</given-names> </name><name name-style="western"><surname>Richir</surname><given-names>MC</given-names> </name><etal/></person-group><article-title>Endoscopic intermuscular dissection for deep submucosal invasive cancer in the rectum: a new endoscopic approach</article-title><source>Endoscopy</source><year>2022</year><month>10</month><volume>54</volume><issue>10</issue><fpage>993</fpage><lpage>998</lpage><pub-id pub-id-type="doi">10.1055/a-1748-8573</pub-id><pub-id pub-id-type="medline">35073588</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Anitei</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Zeitoun</surname><given-names>G</given-names> </name><name name-style="western"><surname>Mlecnik</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Prognostic and predictive values of the immunoscore in patients with rectal cancer</article-title><source>Clin Cancer Res</source><year>2014</year><month>04</month><day>1</day><volume>20</volume><issue>7</issue><fpage>1891</fpage><lpage>1899</lpage><pub-id pub-id-type="doi">10.1158/1078-0432.CCR-13-2830</pub-id><pub-id pub-id-type="medline">24691640</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abbaspour</surname><given-names>E</given-names> </name><name name-style="western"><surname>Karimzadhagh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Monsef</surname><given-names>A</given-names> </name><name name-style="western"><surname>Joukar</surname><given-names>F</given-names> </name><name name-style="western"><surname>Mansour-Ghanaei</surname><given-names>F</given-names> </name><name name-style="western"><surname>Hassanipour</surname><given-names>S</given-names> </name></person-group><article-title>Application of radiomics for preoperative prediction of lymph node metastasis in colorectal cancer: a systematic review and meta-analysis</article-title><source>Int J Surg</source><year>2024</year><month>06</month><day>1</day><volume>110</volume><issue>6</issue><fpage>3795</fpage><lpage>3813</lpage><pub-id pub-id-type="doi">10.1097/JS9.0000000000001239</pub-id><pub-id pub-id-type="medline">38935817</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chu</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Fishman</surname><given-names>EK</given-names> </name></person-group><article-title>Pancreatic ductal adenocarcinoma staging: a narrative review of radiologic techniques and advances</article-title><source>Int J Surg</source><year>2024</year><month>10</month><day>1</day><volume>110</volume><issue>10</issue><fpage>6052</fpage><lpage>6063</lpage><pub-id pub-id-type="doi">10.1097/JS9.0000000000000899</pub-id><pub-id pub-id-type="medline">38085802</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ushigome</surname><given-names>H</given-names> </name><name name-style="western"><surname>Fukunaga</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Nagasaki</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Difficulty of predicting lymph node metastasis on CT in patients with rectal neuroendocrine tumors</article-title><source>PLoS ONE</source><year>2019</year><volume>14</volume><issue>2</issue><fpage>e0211675</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0211675</pub-id><pub-id pub-id-type="medline">30742649</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dankwa-Mullan</surname><given-names>I</given-names> </name><name name-style="western"><surname>Weeraratne</surname><given-names>D</given-names> </name></person-group><article-title>Artificial intelligence and machine learning technologies in cancer care: addressing disparities, bias, and data siversity</article-title><source>Cancer Discov</source><year>2022</year><month>06</month><day>2</day><volume>12</volume><issue>6</issue><fpage>1423</fpage><lpage>1427</lpage><pub-id pub-id-type="doi">10.1158/2159-8290.CD-22-0373</pub-id><pub-id pub-id-type="medline">35652218</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>M&#x00FC;ller</surname><given-names>S</given-names> </name><name name-style="western"><surname>Diekmann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wenzel</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Combining machine learning with real-world data to identify gaps in clinical practice guidelines: feasibility study using the prospective German stroke registry and the national acute ischemic stroke guidelines</article-title><source>JMIR Med Inform</source><year>2025</year><month>07</month><day>11</day><volume>13</volume><fpage>e69282</fpage><pub-id pub-id-type="doi">10.2196/69282</pub-id><pub-id pub-id-type="medline">40653745</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nemlander</surname><given-names>E</given-names> </name><name name-style="western"><surname>Ewing</surname><given-names>M</given-names> </name><name name-style="western"><surname>Abedi</surname><given-names>E</given-names> </name><etal/></person-group><article-title>A machine learning tool for identifying non-metastatic colorectal cancer in primary care</article-title><source>Eur J Cancer</source><year>2023</year><month>03</month><volume>182</volume><issue>100-6</issue><fpage>100</fpage><lpage>106</lpage><pub-id pub-id-type="doi">10.1016/j.ejca.2023.01.011</pub-id><pub-id pub-id-type="medline">36758474</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hamerla</surname><given-names>G</given-names> </name><name name-style="western"><surname>Meyer</surname><given-names>HJ</given-names> </name><name name-style="western"><surname>Hambsch</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Radiomics model based on non-contrast CT shows no predictive power for complete pathological response in locally advanced rectal cancer</article-title><source>Cancers (Basel)</source><year>2019</year><month>10</month><day>29</day><volume>11</volume><issue>11</issue><fpage>1680</fpage><pub-id pub-id-type="doi">10.3390/cancers11111680</pub-id><pub-id pub-id-type="medline">31671766</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guan</surname><given-names>X</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>G</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>W</given-names> </name><etal/></person-group><article-title>An easy-to-use artificial intelligence preoperative lymph node metastasis predictor (LN-MASTER) in rectal cancer based on a privacy-preserving computing platform: multicenter retrospective cohort study</article-title><source>Int J Surg</source><year>2023</year><month>03</month><day>1</day><volume>109</volume><issue>3</issue><fpage>255</fpage><lpage>265</lpage><pub-id pub-id-type="doi">10.1097/JS9.0000000000000067</pub-id><pub-id pub-id-type="medline">36927812</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ding</surname><given-names>L</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>GW</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>BC</given-names> </name><etal/></person-group><article-title>Artificial intelligence system of faster region-based convolutional neural network surpassing senior radiologists in evaluation of metastatic lymph nodes of rectal cancer</article-title><source>Chin Med J (Engl)</source><year>2019</year><month>02</month><volume>132</volume><issue>4</issue><fpage>379</fpage><lpage>387</lpage><pub-id pub-id-type="doi">10.1097/CM9.0000000000000095</pub-id><pub-id pub-id-type="medline">30707177</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weiser</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Chou</surname><given-names>JF</given-names> </name><name name-style="western"><surname>Keshinro</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Development and assessment of a clinical calculator for estimating the likelihood of recurrence and survival among patients with locally advanced rectal cancer treated with chemotherapy, radiotherapy, and surgery</article-title><source>JAMA Netw Open</source><year>2021</year><month>11</month><day>1</day><volume>4</volume><issue>11</issue><fpage>e2133457</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2021.33457</pub-id><pub-id pub-id-type="medline">34748003</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Identification of metastatic lymph nodes in MR imaging with faster region-based convolutional neural networks</article-title><source>Cancer Res</source><year>2018</year><month>09</month><day>1</day><volume>78</volume><issue>17</issue><fpage>5135</fpage><lpage>5143</lpage><pub-id pub-id-type="doi">10.1158/0008-5472.CAN-18-0494</pub-id><pub-id pub-id-type="medline">30026330</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>S</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><etal/></person-group><article-title>CT morphological features for predicting the risk of lymph node metastasis in T1 colorectal cancer</article-title><source>Eur Radiol</source><year>2023</year><month>10</month><volume>33</volume><issue>10</issue><fpage>6861</fpage><lpage>6871</lpage><pub-id pub-id-type="doi">10.1007/s00330-023-09688-9</pub-id><pub-id pub-id-type="medline">37171490</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Niu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wen</surname><given-names>L</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Diagnostic performance of node reporting and data system (Node-RADS) for assessing mesorectal lymph node in rectal cancer by CT</article-title><source>BMC Cancer</source><year>2024</year><month>06</month><day>11</day><volume>24</volume><issue>1</issue><fpage>716</fpage><pub-id pub-id-type="doi">10.1186/s12885-024-12487-0</pub-id><pub-id pub-id-type="medline">38862951</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saraste</surname><given-names>D</given-names> </name><name name-style="western"><surname>Gunnarsson</surname><given-names>U</given-names> </name><name name-style="western"><surname>Janson</surname><given-names>M</given-names> </name></person-group><article-title>Predicting lymph node metastases in early rectal cancer</article-title><source>Eur J Cancer</source><year>2013</year><month>03</month><volume>49</volume><issue>5</issue><fpage>1104</fpage><lpage>1108</lpage><pub-id pub-id-type="doi">10.1016/j.ejca.2012.10.005</pub-id><pub-id pub-id-type="medline">23122785</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>von den Gr&#x00FC;n</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Hartmann</surname><given-names>A</given-names> </name><name name-style="western"><surname>Fietkau</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Can clinicopathological parameters predict for lymph node metastases in ypT0-2 rectal carcinoma? Results of the CAO/ARO/AIO-94 and CAO/ARO/AIO-04 phase 3 trials</article-title><source>Radiother Oncol</source><year>2018</year><month>09</month><volume>128</volume><issue>3</issue><fpage>557</fpage><lpage>563</lpage><pub-id pub-id-type="doi">10.1016/j.radonc.2018.06.008</pub-id><pub-id pub-id-type="medline">29929861</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gunderson</surname><given-names>LL</given-names> </name><name name-style="western"><surname>Sargent</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Tepper</surname><given-names>JE</given-names> </name><etal/></person-group><article-title>Impact of T and N stage and treatment on survival and relapse in adjuvant rectal cancer: a pooled analysis</article-title><source>J Clin Oncol</source><year>2004</year><month>05</month><day>15</day><volume>22</volume><issue>10</issue><fpage>1785</fpage><lpage>1796</lpage><pub-id pub-id-type="doi">10.1200/JCO.2004.08.173</pub-id><pub-id pub-id-type="medline">15067027</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Duchalais</surname><given-names>E</given-names> </name><name name-style="western"><surname>Glyn Mullaney</surname><given-names>T</given-names> </name><name name-style="western"><surname>Spears</surname><given-names>GM</given-names> </name><etal/></person-group><article-title>Prognostic value of pathological node status after neoadjuvant radiotherapy for rectal cancer</article-title><source>Br J Surg</source><year>2018</year><month>10</month><volume>105</volume><issue>11</issue><fpage>1501</fpage><lpage>1509</lpage><pub-id pub-id-type="doi">10.1002/bjs.10867</pub-id><pub-id pub-id-type="medline">29663352</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhao</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wen</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name></person-group><article-title>Oncologic nomogram for stage I rectal cancer to assist patient selection for adjuvant (chemo)radiotherapy following local excision</article-title><source>Front Oncol</source><year>2021</year><volume>11</volume><issue>632085</issue><fpage>33816269</fpage><pub-id pub-id-type="doi">10.3389/fonc.2021.632085</pub-id><pub-id pub-id-type="medline">33816269</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ueno</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hase</surname><given-names>K</given-names> </name><name name-style="western"><surname>Mochizuki</surname><given-names>H</given-names> </name></person-group><article-title>Criteria for extramural perineural invasion as a prognostic factor in rectal cancer</article-title><source>Br J Surg</source><year>2001</year><month>07</month><volume>88</volume><issue>7</issue><fpage>994</fpage><lpage>1000</lpage><pub-id pub-id-type="doi">10.1046/j.0007-1323.2001.01810.x</pub-id><pub-id pub-id-type="medline">11442534</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Song</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kang</surname><given-names>KM</given-names> </name><etal/></person-group><article-title>Significance of perineural and lymphovascular invasion in locally advanced rectal cancer treated by preoperative chemoradiotherapy and radical surgery: can perineural invasion be an indication of adjuvant chemotherapy?</article-title><source>Radiother Oncol</source><year>2019</year><month>04</month><volume>133</volume><fpage>125</fpage><lpage>131</lpage><pub-id pub-id-type="doi">10.1016/j.radonc.2019.01.002</pub-id><pub-id pub-id-type="medline">30935568</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guan</surname><given-names>X</given-names> </name><name name-style="western"><surname>Jiao</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wen</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Optimal examined lymph node number for accurate staging and long-term survival in rectal cancer: a population-based study</article-title><source>Int J Surg</source><year>2023</year><month>08</month><day>1</day><volume>109</volume><issue>8</issue><fpage>2241</fpage><lpage>2248</lpage><pub-id pub-id-type="doi">10.1097/JS9.0000000000000320</pub-id><pub-id pub-id-type="medline">37428195</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Multiregional-based magnetic resonance imaging radiomics combined with clinical data improves efficacy in predicting lymph node metastasis of rectal cancer</article-title><source>Front Oncol</source><year>2020</year><volume>10</volume><issue>585767</issue><fpage>585767</fpage><pub-id pub-id-type="doi">10.3389/fonc.2020.585767</pub-id><pub-id pub-id-type="medline">33680919</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wan</surname><given-names>L</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Prediction of lymph node metastasis in stage T1-2 rectal cancers with MRI-based deep learning</article-title><source>Eur Radiol</source><year>2023</year><month>05</month><volume>33</volume><issue>5</issue><fpage>3638</fpage><lpage>3646</lpage><pub-id pub-id-type="doi">10.1007/s00330-023-09450-1</pub-id><pub-id pub-id-type="medline">36905470</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shulman</surname><given-names>RM</given-names> </name><name name-style="western"><surname>Deng</surname><given-names>M</given-names> </name><name name-style="western"><surname>Handorf</surname><given-names>EA</given-names> </name><name name-style="western"><surname>Meyer</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Lynch</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Arora</surname><given-names>S</given-names> </name></person-group><article-title>Factors associated with racial and ethnic disparities in locally advanced rectal cancer outcomes</article-title><source>JAMA Netw Open</source><year>2024</year><month>02</month><day>5</day><volume>7</volume><issue>2</issue><fpage>e240044</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.0044</pub-id><pub-id pub-id-type="medline">38421650</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Shao</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Epidemiology and a predictive model of prognosis index based on machine learning in primary breast lymphoma: population-based study</article-title><source>JMIR Public Health Surveill</source><year>2023</year><month>06</month><day>8</day><volume>9</volume><fpage>e45455</fpage><pub-id pub-id-type="doi">10.2196/45455</pub-id><pub-id pub-id-type="medline">37169516</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1 </label><p>Normalization standards of clinical data.</p><media xlink:href="medinform_v13i1e73765_app1.docx" xlink:title="DOCX File, 14 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>The hyperparameter settings and tuning strategies of the 11 models.</p><media xlink:href="medinform_v13i1e73765_app2.docx" xlink:title="DOCX File, 15 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Range standard for property values of clinical features in models.</p><media xlink:href="medinform_v13i1e73765_app3.docx" xlink:title="DOCX File, 20 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Confusion matrices for lymph node metastasis (LNM) prediction in rectal cancer (RC) patients.</p><media xlink:href="medinform_v13i1e73765_app4.png" xlink:title="PNG File, 86 KB"/></supplementary-material></app-group></back></article>