<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v14i1e80809</article-id><article-id pub-id-type="doi">10.2196/80809</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Machine Learning&#x2013;Based Risk Prediction for Coronary Heart Disease Complicated by Hyperhomocysteinemia: Retrospective Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Du</surname><given-names>Ming-Yuan</given-names></name><degrees>MMD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Lyu</surname><given-names>Meng-Ke</given-names></name><degrees>MMD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Liu</surname><given-names>Hai-long</given-names></name><degrees>MMD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Li</surname><given-names>Yi-zhuo</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yan</surname><given-names>Hai-feng</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Li</surname><given-names>Xiao-hui</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Heart Center, The First Affiliated Hospital of Henan University of Chinese Medicine, National Regional (TCM) Cardiovascular Diagnosis and Treatment Center</institution><addr-line>Zhengzhou</addr-line><country>China</country></aff><aff id="aff2"><institution>Collaborative Innovation Center of Prevention and Treatment of Major Diseases by Chinese and Western Medicine</institution><addr-line>Zhengzhou</addr-line><country>China</country></aff><aff id="aff3"><institution>The First Affiliated Hospital of Henan University of Traditional Chinese Medicine</institution><addr-line>No. 19 Renmin Road, Jinshui District, Henan Province</addr-line><addr-line>Zhengzhou</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Klann</surname><given-names>Jeffrey</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Hou</surname><given-names>Ling</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Zhou</surname><given-names>Yunxiang</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Meng-Ke Lyu, MMD, The First Affiliated Hospital of Henan University of Traditional Chinese Medicine, No. 19 Renmin Road, Jinshui District, Henan Province, Zhengzhou, 450000, China, 86 15649856289; <email>skylmk@126.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>19</day><month>3</month><year>2026</year></pub-date><volume>14</volume><elocation-id>e80809</elocation-id><history><date date-type="received"><day>17</day><month>07</month><year>2025</year></date><date date-type="rev-recd"><day>17</day><month>01</month><year>2026</year></date><date date-type="accepted"><day>20</day><month>01</month><year>2026</year></date></history><copyright-statement>&#x00A9; Ming-Yuan Du, Meng-Ke Lyu, Hai-long Liu, Yi-zhuo Li, Hai-feng Yan, Xiao-hui Li. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 19.3.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2026/1/e80809"/><abstract><sec><title>Background</title><p>Hyperhomocysteinemia (HHcy) is recognized as an independent risk factor for coronary heart disease (CHD), yet accurately predicting CHD risk in patients with HHcy remains a challenge. This study aimed to develop and validate multiple machine learning models for predicting CHD risk in patients with HHcy and elucidate key predictors using Shapley Additive Explanation (SHAP) algorithms.</p></sec><sec><title>Objective</title><p>This study aims to develop and validate machine learning models for predicting the risk of coronary heart disease in individuals with normal homocysteine levels, aiming to improve early risk stratification and clinical decision-making.</p></sec><sec sec-type="methods"><title>Methods</title><p>This single-center retrospective study collected data from patients who were diagnosed with HHcy through electronic medical records, which were randomly divided into training (n=364, 70%), validation (n=78, 15%), and test (n=78, 15%) sets. Seven machine learning models were constructed, including logistic regression, k-nearest neighbor, decision tree, random forest, extreme gradient boost, light gradient boosting machine (LightGBM), and stacking. Six core variables (age, weight, hypertension, continuous drinking history, activated partial thromboplastin time, and carotid plaque) were utilized as inputs, with performance evaluation metrics encompassing area under the receiver operating characteristic curve, accuracy, <italic>F</italic><sub>1</sub>-score, calibration curve, Brier score, and decision curve analysis. Additionally, SHAP interpretation was conducted on the optimal LightGBM model.</p></sec><sec sec-type="results"><title>Results</title><p>The LightGBM model exhibited superior performance in the test set (area under the receiver operating characteristic curve=0.807, <italic>F</italic><sub>1</sub>-score=0.606), demonstrated good calibration (Brier score=0.2415), and yielded high clinical net benefit. SHAP analysis revealed age and activated partial thromboplastin time as the most influential predictors, followed by hypertension, weight, carotid plaque, and continuous drinking history. The correlation heat map illustrated low collinearity among variables, ensuring model stability.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The LightGBM model demonstrated high accuracy and interpretability in forecasting CHD risk among patients with HHcy. The integration of machine learning and interpretable artificial intelligence methods holds promise for delivering personalized early risk assessment and intervention strategies in clinical settings.</p></sec></abstract><kwd-group><kwd>retrospective study</kwd><kwd>hyperhomocysteinemia</kwd><kwd>coronary heart disease</kwd><kwd>machine learning</kwd><kwd>predictive model</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Hyperhomocysteinemia (HHcy) predisposes individuals to heightened and premature susceptibility to coronary heart disease (CHD) [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Existing research has substantiated that HHcy expedites atherosclerosis progression via various mechanisms, including vascular endothelial impairment, pro-oxidative stress, and disrupted coagulation pathways, establishing it as a stand-alone CHD risk factor [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Nevertheless, the CHD risk profile in HHcy cohorts is heterogeneous, potentially modulated by a confluence of factors encompassing age, biochemical milieu, lifestyle choices, and vascular morphology.</p><p>The current assessment tools for CHD risk in patients with HHcy are limited [<xref ref-type="bibr" rid="ref6">6</xref>]. Traditional models, such as Cox or logistic regression, are typically based on a small number of variables and struggle to capture the intricate nonlinear interactions among variables in real clinical settings. Furthermore, these models often rely on predictors that necessitate specialized knowledge or image interpretation, hindering their widespread adoption among noncardiovascular specialist clinicians [<xref ref-type="bibr" rid="ref7">7</xref>]. Given that HHcy is a common condition and many patients are initially diagnosed in primary or nonspecialist clinics, there is a pressing need for a predictive tool that offers predictability, universality, and interpretability to facilitate early risk assessment and intervention in clinical practice.</p><p>Machine learning technology holds potential for predicting the likelihood of diabetes, arrhythmia, and various cardiovascular diseases by effectively capturing intricate multidimensional patterns [<xref ref-type="bibr" rid="ref8">8</xref>]. Ensemble models, such as light gradient boosting machine (LightGBM) and extreme gradient boosting (XGBoost), excel in automatically discerning key features and enhancing predictive accuracy [<xref ref-type="bibr" rid="ref9">9</xref>]. Nevertheless, there is a scarcity of dedicated modeling research on CHD risk among individuals with HHcy. Moreover, the clinical interpretability of numerous machine learning models remains inadequate, constraining their reliability and utility in medical settings [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>This study seeks to develop CHD risk prediction models for individuals with HHcy using real-world electronic health record data from a single center [<xref ref-type="bibr" rid="ref12">12</xref>]. The study aims to compare the performance of 7 mainstream machine learning algorithms in terms of discriminative power, calibration, and clinical net benefit. To improve clinical interpretability, the Shapley Additive Explanation (SHAP) method was used to assess model decision-making and identify key variables. Emphasis was placed on the generalizability of variable selection and form, prioritizing standardized biochemical and lifestyle indicators that are routinely accessible to avoid reliance on specific specialties or complex imaging techniques. The objective is to create a practical model that can be easily implemented across various clinical settings, balancing predictability and interpretability. This model aims to serve as an early risk assessment tool for high-risk HHcy populations and establish a basis for future multicenter external validation studies.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Research Materials</title><p>A comprehensive workflow of the study, including data collection, feature selection, model development, evaluation, and interpretation, is illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Flowchart of developing and validating machine learning&#x2013;based models for predicting coronary heart disease (CHD) risk in patients with hyperhomocysteinemia. This flowchart outlines the entire process of constructing and evaluating risk prediction models for CHD in patients with hyperhomocysteinemia. AUC: area under the receiver operating characteristic curve; CV: cross-validation, DCA: decision curve analysis; KNN: k-nearest neighbor; LightGBM: light gradient boosting machine; XGBoost: extreme gradient boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80809_fig01.png"/></fig></sec><sec id="s2-2"><title>General Information</title><p>This retrospective registry study examines patients with HHcy treated at the First Affiliated Hospital of Henan University of Traditional Chinese Medicine between January 1, 2022, and July 1, 2025.</p><p>Patients were categorized into either the CHD group or the non-CHD group based on clinical diagnoses. Subsequent to admission, demographic features, laboratory values, medical history, familial history, and additional indicators were gathered for statistical assessment.</p></sec><sec id="s2-3"><title>Ethical Considerations</title><p>Ethical approval was obtained from the Ethics Committee of the First Affiliated Hospital of Henan University of Chinese Medicine (approval number: 2025HL-202&#x2010;01). The data used were fully deidentified, and no personally identifiable information was accessible to the researchers. The requirement for informed consent was waived by the ethics committee in accordance with national regulations, citing the retrospective nature of the study. No compensation was provided to participants.</p></sec><sec id="s2-4"><title>Diagnostic Criteria</title><p>The patient was diagnosed with stable CHD in accordance with the 2025 American College of Cardiology/American Heart Association (ACC/AHA) guideline for chronic coronary syndromes [<xref ref-type="bibr" rid="ref13">13</xref>]. Acute coronary syndromes were identified following the criteria outlined in the 2023 ESC and 2021 ACC/AHA guidelines for non&#x2013;ST-elevation acute coronary syndrome [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>].</p></sec><sec id="s2-5"><title>Inclusion and Exclusion Criteria</title><sec id="s2-5-1"><title>Inclusion Criteria</title><p>Participants were required to meet the diagnostic criteria for HHcy, be aged between 18 and 80 years, and have complete baseline clinical data available for analysis.</p></sec><sec id="s2-5-2"><title>Exclusion Criteria</title><p>Participants excluded from the study encompass those with comorbid severe organic conditions (eg, advanced tumors, hepatorenal insufficiency), nonatherosclerotic heart ailments (eg, congenital heart disease, myocarditis), individuals with mental illness or cognitive deficits hindering study cooperation, pregnant or lactating women, and those engaged in other clinical trials within the preceding 3 months.</p></sec></sec><sec id="s2-6"><title>Observation Indicators</title><p>This study utilized data collected through a standardized case report form, encompassing five key categories of core indicators: (1) demographics and basic information, such as age, gender, height, weight, BMI, educational level, and marital status; (2) medical history and lifestyle factors, including a history of diseases, such as hypertension, diabetes, transient ischemic attack, as well as lifestyle habits, such as persistent smoking and alcohol consumption; (3) laboratory examination indicators, comprising blood routine parameters (red blood cell, white blood cell, and platelet counts), biochemical markers (total cholesterol, triglycerides, and creatinine), and coagulation function tests (activated partial thromboplastin time [APTT], prothrombin time, and fibrinogen); (4) imaging examination indicators, specifically the presence or absence of carotid plaques as determined by ultrasound or computed tomography angiography; and (5) quality control measures involving the uniform extraction of all data by trained researchers through the electronic medical record system and laboratory information system. A double-person review system was implemented to ensure data accuracy. Following the integrity assessment, no missing values were identified in the variables included, thus obviating the need for imputation or data elimination procedures. The detailed data extraction workflow and quality control procedures are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-7"><title>Machine Learning Modeling and Related Statistical Methods</title><p>Statistical analysis was performed using SPSS Statistics 27. Machine learning modeling utilized R 4.5 (caret package) and Python 3.13 (scikit-learn, xgboost, lightgbm) libraries. Quantitative data were reported as mean (SD). Group differences were assessed using independent-sample 2-tailed <italic>t</italic> tests (for normal distribution) or Mann-Whitney <italic>U</italic> tests (for nonnormal distribution), based on data distribution. Categorical data were presented as n (%), with group differences analyzed using the chi-square test or Fisher exact probability method.</p></sec><sec id="s2-8"><title>Variable Screening and Dataset Division</title><p>Variable screening involved a 2-step dimensionality reduction approach utilizing &#x201C;univariate+multivariate logistic regression.&#x201D; Initially, 9 variables exhibiting significant differences (<italic>P</italic>&#x003C;.05) between the CHD and non-CHD groups were selected for the candidate pool. Subsequently, multivariate analysis was conducted using stepwise regression (backward method, inclusion criterion <italic>&#x03B1;</italic>=.05, exclusion criterion <italic>&#x03B1;</italic>=.10), resulting in the retention of 6 independent predictors: age, body weight, hypertension, history of continuous alcohol consumption, APTT, and carotid artery plaque. The variance inflation factors for all variables were less than 5, indicating the absence of significant multicollinearity.</p><p>The data were stratified based on the presence of CHD comorbidity, resulting in division into a training set (n=364), a validation set (n=78), and a test set (n=78) at a ratio of 7:1.5:1.5. To ensure result reproducibility, a random seed (random_state=2024) was set. The training set was used for model training and initial parameter optimization, the validation set for fine-tuning model parameters and classification threshold optimization, and the test set as an independent dataset for the final model performance assessment.</p></sec><sec id="s2-9"><title>Machine Learning Model Construction</title><p>Seven classification models were developed to forecast the risk of CHD, categorized into traditional models (logistic regression, decision tree, and k-nearest neighbors [KNNs]) and ensemble models (random forest, XGBoost, LightGBM, and stacking). Hyperparameter optimization was performed on all models using grid search (GridSearchCV) on the training set to enhance the area under the receiver operating characteristic curve (AUC) on the validation set.</p><p>Logistic regression employs L2 regularization and class weight balancing (class_weight='balanced') with a regularization strength of C=0.1. The decision tree and random forest models optimize the max_depth, which is ultimately determined to be 7, with min_samples_split set to 2 for the decision tree and n_estimators set to 50 for the random forest. KNN utilizes the Euclidean distance metric, with the number of neighbors ultimately set to 9.</p><p>In the context of boosting models, both XGBoost and LightGBM were configured with a scale_pos_weight of 2 to address class imbalance. Parameter optimization yielded the following settings: XGBoost (max_depth=3 and learning_rate=0.3) and LightGBM (max_depth=3 and learning_rate=0.01). The stacking model used the aforementioned 6 models as base learners, with logistic regression serving as the meta-learner. The integration of outputs was achieved through 5-fold cross-validation.</p><p>All parameter tuning details are listed in Table S2 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> to enhance reproducibility.</p></sec><sec id="s2-10"><title>Model Evaluation and Validation</title><p>A multidimensional index is used to thoroughly assess the model&#x2019;s performance, with the subsequent metrics computed on the training, validation, and test sets, respectively:</p><p>Discrimination is primarily assessed using the AUC, computed through &#x201C;sklearn.metrics.roc_auc_score.&#x201D; Additionally, metrics, such as accuracy, recall, specificity, and <italic>F</italic><sub>1</sub>-score, are documented. To provide a comprehensive evaluation of classification performance, the precision-recall (PR) curve is generated, and the PR-AUC is determined.</p><p>Calibration is assessed through a calibration curve generated using the sklearn.calibration.calibration_curve function. An optimal calibration curve closely aligns with the diagonal line. The Brier score quantifies the accuracy of predicted probabilities by measuring the mean squared error between predicted probabilities and true labels. A lower Brier score signifies more precise predicted probabilities.</p><p>We used a 10-fold cross-validation method on the training set to assess model stability, recording the mean AUC and standard deviation. To evaluate generalization capability, we considered the metrics of the test set as the reference standard. A significant decrease in the test set&#x2019;s AUC compared to the training set (eg, &#x0394;AUC&#x003C;0.1) suggested potential overfitting.</p></sec><sec id="s2-11"><title>Sample Imbalance Processing and Threshold Optimization</title><p>To address the 1:2 sample imbalance between the CHD and non-CHD groups, the following strategies were implemented to enhance the model&#x2019;s ability to identify the minority class (CHD group):</p><p>For models supporting weight parameters (eg, logistic regression, XGBoost, LightGBM, and random forest), class_weight='balanced' or scale_pos_weight=2 was set. Specifically, class_weight='balanced' automatically calculates class weights inversely proportional to class frequencies, amplifying the contribution of CHD samples in the loss function. Meanwhile, scale_pos_weight=2 directly sets the weight of positive samples (CHD group) to twice that of negative samples (non-CHD group), aligning with the 1:2 sample ratio and mitigating the model&#x2019;s bias toward the majority class caused by imbalance.</p><p>Although class weight adjustment balances class importance during training, the default classification threshold (0.5) may not meet the clinical demand for high sensitivity in CHD screening. Therefore, further threshold optimization was performed on the best-performing LightGBM model. Based on predicted probabilities from the validation set, candidate thresholds ranging from 0.1 to 0.6 (stepping at 0.05) were evaluated, with sensitivity, specificity, and <italic>F</italic><sub>1</sub>-score calculated for each threshold. The optimal threshold was determined by balancing these metrics&#x2014;prioritizing high sensitivity (to identify as many true CHD patients as possible) while maintaining acceptable specificity. This approach breaks the limitation of a fixed threshold, enabling the model to better align with the practical goal of &#x201C;reducing missed diagnoses&#x201D; in imbalanced scenarios (see Figure S1 in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>).</p></sec><sec id="s2-12"><title>Analysis of Model Interpretability</title><p>The LightGBM model&#x2019;s optimal performance was assessed using the SHAP algorithm for both global and local interpretability analyses. Globally, the SHAP summary plot illustrated the average contribution of each variable to the model prediction, identifying key factors influencing the risk of CHD. Locally, the SHAP dependence plot depicted the functional relationship between each feature and the model output, highlighting the positive and negative effects of variables, such as age and APTT, on the predicted value. Additionally, a Pearson correlation heatmap was generated to assess collinearity among features, confirming that no strong correlations (<italic>r</italic>&#x003C;0.5) existed among the model variables. This analysis ensured the model&#x2019;s stability and bolstered the credibility of its interpretation.</p></sec><sec id="s2-13"><title>Sample Size Estimation</title><p>Based on the recommendations by van Smeden et al [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>] and Riley et al [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>], we estimated the minimum sample size required to develop a reliable multivariable prediction model using 5 candidate predictors. Assuming an outcome prevalence of approximately 30%, a shrinkage factor of 0.9, and an expected Cox-Snell <italic>R</italic>&#x00B2; of 0.15, the minimum required sample size was calculated to be approximately 218. This study developed models with a larger sample size, thus mitigating the potential for major overestimation bias based on these parameters.</p></sec><sec id="s2-14"><title>External Benchmark Models</title><p>For comparative purposes, Framingham risk score [<xref ref-type="bibr" rid="ref20">20</xref>] and the ACC/AHA pooled cohort equations (ASCVD score) [<xref ref-type="bibr" rid="ref21">21</xref>] were calculated for all patients using baseline clinical parameters. These scores were included as benchmark models to evaluate the incremental value of the model.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Comparison of Clinical Data Between the 2 Groups</title><p>This study comprised 520 patients with HHcy, with 174 in the CHD group and 347 in the non-CHD group, resulting in a CHD detection rate of 33.46%. The analysis of baseline data revealed significant differences in 9 variables between the 2 groups, all with <italic>P</italic>&#x003C;.05, as detailed in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Comparison of baseline characteristics between patients with hyperhomocysteinemia with and without coronary heart disease (CHD)<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Characteristic</td><td align="left" valign="bottom">Control group (n=347)</td><td align="left" valign="bottom">Case group (n=174)</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="5">Demographic and general information</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age (y), mean (SD)</td><td align="left" valign="top">65.30 (10.36)</td><td align="left" valign="top">70.49 (8.76)</td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sex, n (%)</td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Male</td><td align="left" valign="top">263 (0.71)</td><td align="left" valign="top">107 (0.29)</td><td align="left" valign="top" rowspan="2"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female</td><td align="left" valign="top">83 (0.55)</td><td align="left" valign="top">67 (0.45)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Height (cm), mean (SD)</td><td align="left" valign="top">167.89 (7.29)</td><td align="left" valign="top">167.62 (7.30)</td><td align="left" valign="top">.78</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Weight (kg), mean (SD)</td><td align="left" valign="top">66.96 (10.60)</td><td align="left" valign="top">71.52 (9.91)</td><td align="left" valign="top">.04<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>BMI, mean (SD)</td><td align="left" valign="top">24.73 (3.22)</td><td align="left" valign="top">25.12 (3.06)</td><td align="left" valign="top">.07</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Education level, n (%)</td><td align="left" valign="top">.86</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Junior high school or below</td><td align="left" valign="top">224 (0.66)</td><td align="left" valign="top">114 (0.34)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>High school and above</td><td align="left" valign="top">122 (0.67)</td><td align="left" valign="top">60 (0.33)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Household annual income (RMB 10,000 yuan<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup>), n (%)</td><td align="left" valign="top">.13</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x003C;8</td><td align="left" valign="top">280 (0.67)</td><td align="left" valign="top">150 (0.33)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x2003;&#x2264;</named-content>8</td><td align="left" valign="top">66 (0.73)</td><td align="left" valign="top">24 (0.27)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Occupation in the 6 months prior to onset, n (%)</td><td align="left" valign="top">.66</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Mental workers</td><td align="left" valign="top">43 (0.64)</td><td align="left" valign="top">24 (0.36)</td><td align="left" valign="top" rowspan="2"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Physical workers</td><td align="left" valign="top">303 (0.74)</td><td align="left" valign="top">105 (0.26)</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Payment type, n (%)</td><td align="left" valign="top">.12</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Basic medical care</td><td align="left" valign="top">232 (0.69)</td><td align="left" valign="top">102 (0.31)</td><td align="left" valign="top" rowspan="2"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Urban medical care</td><td align="left" valign="top">113 (0.61)</td><td align="left" valign="top">72 (0.39)</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Marital status, n (%)</td><td align="left" valign="top">.65</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Married</td><td align="left" valign="top">333 (0.67)</td><td align="left" valign="top">166 (0.33)</td><td align="left" valign="top" rowspan="2"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Separated</td><td align="left" valign="top">13 (0.62)</td><td align="left" valign="top">8 (0.32)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Body temperature</td><td align="left" valign="top">36.46 (0.29)</td><td align="left" valign="top">36.45 (0.22)</td><td align="left" valign="top">.85</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Respiratory rate</td><td align="left" valign="top">18.95 (4.67)</td><td align="left" valign="top">18.68 (1.76)</td><td align="left" valign="top">.82</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Heart rate</td><td align="left" valign="top">77.14 (12.62)</td><td align="left" valign="top">77.41 (12.64)</td><td align="left" valign="top">.88</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Systolic blood pressure</td><td align="left" valign="top">143.87 (21.69)</td><td align="left" valign="top">141.49 (21.09)</td><td align="left" valign="top">.16</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Diastolic blood pressure</td><td align="left" valign="top">84.19 (13.92)</td><td align="left" valign="top">81.67 (14.37)</td><td align="left" valign="top">.05</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Medical history, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hypertension</td><td align="left" valign="top">209 (0.61)</td><td align="left" valign="top">134 (0.39)</td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Diabetes mellitus</td><td align="left" valign="top">176 (0.70)</td><td align="left" valign="top">76 (0.30)</td><td align="left" valign="top">.14</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hyperlipidemia</td><td align="left" valign="top">79 (0.70)</td><td align="left" valign="top">36 (0.30)</td><td align="left" valign="top">.58</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>History of TIA<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="top">20 (0.56)</td><td align="left" valign="top">16 (0.44)</td><td align="left" valign="top">.15</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Continuous smoking history</td><td align="left" valign="top">135 (0.75)</td><td align="left" valign="top">44 (0.25)</td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Continuous drinking history</td><td align="left" valign="top">82 (0.78)</td><td align="left" valign="top">20 (0.22)</td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Carotid plaque</td><td align="left" valign="top">275 (0.64)</td><td align="left" valign="top">154 (0.36)</td><td align="left" valign="top">.01<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td></tr><tr><td align="left" valign="top" colspan="5"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Blood biochemical indicators, mean (SD)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Red blood cell count</td><td align="left" valign="top">4.46 (0.48)</td><td align="left" valign="top">4.43 (0.52)</td><td align="left" valign="top">.32</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>White blood cell count</td><td align="left" valign="top">6.90 (4.28)</td><td align="left" valign="top">6.61 (1.93)</td><td align="left" valign="top">.80</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Platelet count</td><td align="left" valign="top">207.82 (60.41)</td><td align="left" valign="top">211.49 (53.30)</td><td align="left" valign="top">.32</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hemoglobin concentration</td><td align="left" valign="top">141.76 (29.06)</td><td align="left" valign="top">143.21 (38.77)</td><td align="left" valign="top">.15</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total cholesterol</td><td align="left" valign="top">4.42 (1.27)</td><td align="left" valign="top">4.39 (1.24)</td><td align="left" valign="top">.63</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Triglycerides</td><td align="left" valign="top">1.73 (1.59)</td><td align="left" valign="top">1.65 (1.10)</td><td align="left" valign="top">.39</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content> Low-density lipoprotein</td><td align="left" valign="top">2.63 (0.87)</td><td align="left" valign="top">2.53 (0.98)</td><td align="left" valign="top">.11</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>High-density lipoprotein</td><td align="left" valign="top">1.14 (0.29)</td><td align="left" valign="top">1.20 (0.30)</td><td align="left" valign="top">.06</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Prothrombin time</td><td align="left" valign="top">11.37 (2.76)</td><td align="left" valign="top">10.86 (2.96)</td><td align="left" valign="top">.84</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fibrinogen content</td><td align="left" valign="top">3.15 (0.95)</td><td align="left" valign="top">3.19 (0.75)</td><td align="left" valign="top">.11</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Activated partial thromboplastin time</td><td align="left" valign="top">30.52 (4.95)</td><td align="left" valign="top">29.44 (3.82)</td><td align="left" valign="top">.01<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Thrombin time</td><td align="left" valign="top">18.67 (64.39)</td><td align="left" valign="top">15.23 (1.89)</td><td align="left" valign="top">.34</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Glycated hemoglobin</td><td align="left" valign="top">6.50 (1.63)</td><td align="left" valign="top">6.44 (1.28)</td><td align="left" valign="top">.17</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Creatinine</td><td align="left" valign="top">74.44 (70.07)</td><td align="left" valign="top">65.23 (21.57)</td><td align="left" valign="top">.04<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Uric acid</td><td align="left" valign="top">313.56 (92.76)</td><td align="left" valign="top">310.13 (90.29)</td><td align="left" valign="top">.64</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>The table includes demographic information, clinical history, lifestyle factors, and laboratory indicators for a total of 520 patients. Quantitative variables are presented as mean (SD), while categorical variables are shown as counts and percentages. Group differences were assessed using independent-sample <italic>t</italic> tests for normally distributed data, Mann-Whitney <italic>U</italic> tests for nonnormally distributed data, and chi-square tests for categorical data.</p></fn><fn id="table1fn2"><p><sup>b</sup>P&#x003C;.05.</p></fn><fn id="table1fn3"><p><sup>c</sup>Conversion based on an exchange rate of 1 US $&#x2248;7 RMB.</p></fn><fn id="table1fn4"><p><sup>d</sup>TIA: transient ischemic attack.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Univariate Regression Analysis of Risk Factors for CHD in HHcy Patients</title><p>Nine statistically significant factors were selected as independent variables for logistic regression analysis to predict the occurrence of CHD. The results revealed that 6 variables&#x2014;age, weight, hypertension, continuous drinking history, APTT, and carotid plaque&#x2014;significantly influenced the occurrence of CHD (all <italic>P</italic>&#x003C;.05), as shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Logistic regression analysis of risk factors for coronary heart disease (CHD) in patients with hyperhomocysteinemia.<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Factor</td><td align="left" valign="bottom">OR<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="bottom">SE</td><td align="left" valign="bottom">Wald statistics</td><td align="left" valign="bottom"><italic>P</italic> value</td><td align="left" valign="bottom" colspan="2">95% CI</td></tr></thead><tbody><tr><td align="left" valign="top">Age</td><td align="left" valign="top">1.06</td><td align="left" valign="top">0.01</td><td align="left" valign="top">22.68</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">1.08-1.03</td></tr><tr><td align="left" valign="top">Gender</td><td align="left" valign="top">0.69</td><td align="left" valign="top">0.26</td><td align="left" valign="top">2.12</td><td align="left" valign="top">.15</td><td align="left" valign="top" colspan="2">1.14-0.42</td></tr><tr><td align="left" valign="top">Weight</td><td align="left" valign="top">1.04</td><td align="left" valign="top">0.01</td><td align="left" valign="top">13.90</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">1.06-1.02</td></tr><tr><td align="left" valign="top">Hypertension</td><td align="left" valign="top">1.99</td><td align="left" valign="top">0.23</td><td align="left" valign="top">9.26</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top" colspan="2">3.09-1.28</td></tr><tr><td align="left" valign="top">Continuous smoking history</td><td align="left" valign="top">0.90</td><td align="left" valign="top">0.27</td><td align="left" valign="top">0.14</td><td align="left" valign="top">.71</td><td align="left" valign="top" colspan="2">1.54-0.53</td></tr><tr><td align="left" valign="top">Continuous drinking history</td><td align="left" valign="top">0.51</td><td align="left" valign="top">0.33</td><td align="left" valign="top">4.03</td><td align="left" valign="top">.045</td><td align="left" valign="top" colspan="2">0.99-0.27</td></tr><tr><td align="left" valign="top">APTT<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.02</td><td align="left" valign="top">5.82</td><td align="left" valign="top">.02</td><td align="left" valign="top" colspan="2">0.99-0.90</td></tr><tr><td align="left" valign="top">Cr<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.99</td><td align="left" valign="top">0.01</td><td align="left" valign="top">1.96</td><td align="left" valign="top">.16</td><td align="left" valign="top" colspan="2">1.00-0.98</td></tr><tr><td align="left" valign="top">Carotid plaque</td><td align="left" valign="top">1.79</td><td align="left" valign="top">0.29</td><td align="left" valign="top">3.99</td><td align="left" valign="top">.046</td><td align="left" valign="top" colspan="2">3.18-1.01</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>The table presents the outcomes of univariate logistic regression analysis that pinpoint factors linked to the incidence of coronary heart disease in patients with hyperhomocysteinemia. It encompasses odds ratios, SE, Wald statistics, and 95% CI for each factor. </p></fn><fn id="table2fn2"><p><sup>b</sup>OR: odds ratio.</p></fn><fn id="table2fn3"><p><sup>c</sup>APTT: activated partial thromboplastin time.</p></fn><fn id="table2fn4"><p><sup>d</sup>Cr: creatinine.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Establishment and Evaluation of 7 Machine Learning Models for CHD Risk Prediction in Patients With HHcy</title><p>This study utilized 7 machine learning algorithms, namely logistic regression, XGBoost, LightGBM, random forest, decision tree, KNNs, and stacking ensemble, to develop risk prediction models for CHD in patients with HHcy based on 6 core predictors: age, weight, hypertension, continuous drinking history, APTT, and carotid plaque. In the training set assessment, each model exhibited distinct performance characteristics. Notably, the decision tree, random forest, and XGBoost models achieved perfect discrimination (AUC=1.000) and classification metrics (accuracy, sensitivity, specificity, and <italic>F</italic><sub>1</sub>-score=1.000), indicating potential overfitting, unlike the logistic regression and KNN models. The LightGBM model displayed strong performance with an AUC of 0.987, accuracy of 0.854, and <italic>F</italic><sub>1</sub>-score of 0.818. Conversely, the stacking ensemble model yielded an AUC of 0.933, accuracy of 0.810, and <italic>F</italic><sub>1</sub>-score of 0.660, demonstrating inferior performance compared to the aforementioned potentially overfitting models.</p><p>During the validation phase, model performance varied notably among different algorithms. The XGBoost model exhibited the highest AUC of 0.802 and an <italic>F</italic><sub>1</sub>-score of 0.689. The stacking model outperformed others with an AUC of 0.800, the highest accuracy of 0.769, and an <italic>F</italic><sub>1</sub>-score of 0.625. The LightGBM model achieved an AUC of 0.780 and an <italic>F</italic><sub>1</sub>-score of 0.648. In comparison, the logistic model yielded an AUC of 0.755 and an F1-score of 0.639. The random forest model attained an AUC of 0.777; however, it demonstrated low sensitivity at 0.355 and an <italic>F</italic><sub>1</sub>-score of 0.489. The decision tree model, with an AUC of 0.679 and an <italic>F</italic><sub>1</sub>-score of 0.613, displayed limited generalization ability and was notably impacted by overfitting during training.</p><p>During testing, the LightGBM model demonstrated the highest AUC of 0.807, a sensitivity of 0.913, and an <italic>F</italic><sub>1</sub>-score of 0.636, indicating promising performance. The logistic model yielded an AUC of 0.796, an <italic>F</italic><sub>1</sub>-score of 0.603, a sensitivity of 0.826, and a specificity of 0.618, striking a balance between prediction accuracy and clinical relevance. In contrast, the stacking model achieved an AUC of 0.802 but exhibited lower sensitivity at 0.478 and an <italic>F</italic><sub>1</sub>-score of 0.564, suggesting limited generalizability. Although the random forest model achieved the highest accuracy at 0.769, its <italic>F</italic><sub>1</sub>-score was 0.571. The XGBoost model&#x2019;s performance notably decreased, with an AUC of 0.757 and an <italic>F</italic><sub>1</sub>-score of 0.523 (<xref ref-type="table" rid="table3">Table 3</xref>).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Performance metrics of the 7 machine learning models for predicting coronary heart disease (CHD) risk in patients with hyperhomocysteinemia across different datasets.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Model and stage</td><td align="left" valign="top">AUC<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">Accuracy</td><td align="left" valign="top">Sensitivity</td><td align="left" valign="top">Specificity</td><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score</td></tr></thead><tbody><tr><td align="left" valign="top">Training</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Decision tree</td><td align="char" char="." valign="top">1.000</td><td align="char" char="." valign="top">1.000</td><td align="char" char="." valign="top">1.000</td><td align="char" char="." valign="top">1.000</td><td align="char" char="." valign="top">1.000</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>KNN<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="char" char="." valign="top">0.795</td><td align="char" char="." valign="top">0.734</td><td align="char" char="." valign="top">0.475</td><td align="char" char="." valign="top">0.861</td><td align="char" char="." valign="top">0.540</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LightGBM<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="char" char="." valign="top">0.987</td><td align="char" char="." valign="top">0.854</td><td align="char" char="." valign="top">0.992</td><td align="char" char="." valign="top">0.787</td><td align="char" char="." valign="top">0.818</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Logistic</td><td align="char" char="." valign="top">0.701</td><td align="char" char="." valign="top">0.646</td><td align="char" char="." valign="top">0.683</td><td align="char" char="." valign="top">0.627</td><td align="char" char="." valign="top">0.560</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Random forest</td><td align="char" char="." valign="top">1.000</td><td align="char" char="." valign="top">1.000</td><td align="char" char="." valign="top">1.000</td><td align="char" char="." valign="top">1.000</td><td align="char" char="." valign="top">1.000</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stacking</td><td align="char" char="." valign="top">0.933</td><td align="char" char="." valign="top">0.810</td><td align="char" char="." valign="top">0.558</td><td align="char" char="." valign="top">0.934</td><td align="char" char="." valign="top">0.660</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="char" char="." valign="top">1.000</td><td align="char" char="." valign="top">1.000</td><td align="char" char="." valign="top">1.000</td><td align="char" char="." valign="top">1.000</td><td align="char" char="." valign="top">1.000</td></tr><tr><td align="left" valign="top">Validation</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Decision tree</td><td align="char" char="." valign="top">0.679</td><td align="char" char="." valign="top">0.692</td><td align="char" char="." valign="top">0.613</td><td align="char" char="." valign="top">0.745</td><td align="char" char="." valign="top">0.613</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>KNN</td><td align="char" char="." valign="top">0.751</td><td align="char" char="." valign="top">0.731</td><td align="char" char="." valign="top">0.452</td><td align="char" char="." valign="top">0.915</td><td align="char" char="." valign="top">0.571</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LightGBM</td><td align="char" char="." valign="top">0.780</td><td align="char" char="." valign="top">0.679</td><td align="char" char="." valign="top">0.742</td><td align="char" char="." valign="top">0.638</td><td align="char" char="." valign="top">0.648</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Logistic</td><td align="char" char="." valign="top">0.755</td><td align="char" char="." valign="top">0.667</td><td align="char" char="." valign="top">0.742</td><td align="char" char="." valign="top">0.617</td><td align="char" char="." valign="top">0.639</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Random forest</td><td align="char" char="." valign="top">0.777</td><td align="char" char="." valign="top">0.705</td><td align="char" char="." valign="top">0.355</td><td align="char" char="." valign="top">0.936</td><td align="char" char="." valign="top">0.489</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stacking</td><td align="char" char="." valign="top">0.800</td><td align="char" char="." valign="top">0.769</td><td align="char" char="." valign="top">0.484</td><td align="char" char="." valign="top">0.957</td><td align="char" char="." valign="top">0.625</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost</td><td align="char" char="." valign="top">0.802</td><td align="char" char="." valign="top">0.756</td><td align="char" char="." valign="top">0.677</td><td align="char" char="." valign="top">0.809</td><td align="char" char="." valign="top">0.689</td></tr><tr><td align="left" valign="top">Testing</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Decision Tree</td><td align="char" char="." valign="top">0.656</td><td align="char" char="." valign="top">0.615</td><td align="char" char="." valign="top">0.696</td><td align="char" char="." valign="top">0.582</td><td align="char" char="." valign="top">0.516</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>KNN</td><td align="char" char="." valign="top">0.768</td><td align="char" char="." valign="top">0.744</td><td align="char" char="." valign="top">0.565</td><td align="char" char="." valign="top">0.818</td><td align="char" char="." valign="top">0.565</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LightGBM</td><td align="char" char="." valign="top">0.807</td><td align="char" char="." valign="top">0.692</td><td align="char" char="." valign="top">0.913</td><td align="char" char="." valign="top">0.600</td><td align="char" char="." valign="top">0.636</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Logistic</td><td align="char" char="." valign="top">0.796</td><td align="char" char="." valign="top">0.679</td><td align="char" char="." valign="top">0.826</td><td align="char" char="." valign="top">0.618</td><td align="char" char="." valign="top">0.603</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Random Forest</td><td align="char" char="." valign="top">0.765</td><td align="char" char="." valign="top">0.769</td><td align="char" char="." valign="top">0.522</td><td align="char" char="." valign="top">0.873</td><td align="char" char="." valign="top">0.571</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stacking</td><td align="char" char="." valign="top">0.802</td><td align="char" char="." valign="top">0.782</td><td align="char" char="." valign="top">0.478</td><td align="char" char="." valign="top">0.909</td><td align="char" char="." valign="top">0.564</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost</td><td align="char" char="." valign="top">0.757</td><td align="char" char="." valign="top">0.603</td><td align="char" char="." valign="top">0.739</td><td align="char" char="." valign="top">0.545</td><td align="char" char="." valign="top">0.523</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p></fn><fn id="table3fn2"><p><sup>b</sup>KNN: k-nearest neighbors.</p></fn><fn id="table3fn3"><p><sup>c</sup>LightGBM: light gradient boosting machine.</p></fn><fn id="table3fn4"><p><sup>d</sup>XGBoost: extreme gradient boosting.</p></fn></table-wrap-foot></table-wrap><p>In conclusion, both the LightGBM and logistic regression models demonstrated consistent performance across the validation and testing phases, indicating their heightened clinical utility.</p><p>The performance of the 7 machine learning models (decision tree, KNN, LightGBM, logistic regression, random forest, stacking, and XGBoost) in predicting CHD in patients with hyperhomocysteinemia was evaluated. Performance metrics, such as area under the receiver operating characteristic (ROC) curve (AUC), accuracy, sensitivity, specificity, and <italic>F</italic><sub>1</sub>-score, were assessed on the training, validation, and test sets. The results indicate consistent discriminative ability and stability across different data partitions, with LightGBM demonstrating superior performance in the test set (AUC=0.807, <italic>F</italic><sub>1</sub>-score=0.636).</p></sec><sec id="s3-4"><title>Model Performance Evaluation</title><p>Upon analyzing the ROC curves, it was evident that within the training set, the random forest (AUC=0.961), XGBoost (AUC=0.953), stacking (AUC=0.933), and LightGBM (AUC=0.890) models displayed exceptional discriminatory capabilities, indicating potential overfitting. Conversely, the logistic regression model exhibited a comparatively lower AUC of 0.697, while the KNN and decision tree models achieved AUC values of 0.776 and 0.833, respectively (<xref ref-type="fig" rid="figure2">Figure 2A</xref>). In the validation set, the stacking model maintained superior performance (AUC=0.800), closely followed by LightGBM (AUC=0.796), XGBoost (AUC=0.792), and random forest (AUC=0.792). The logistic model also demonstrated satisfactory performance (AUC=0.740), surpassing the decision tree (AUC=0.734) and KNN (AUC=0.764) models (<xref ref-type="fig" rid="figure2">Figure 2B</xref>). Within the testing set, the LightGBM (AUC=0.807) and stacking (AUC=0.802) models achieved the highest AUC values, trailed by KNN (AUC=0.768), random forest (AUC=0.765), and XGBoost (AUC=0.757). The logistic regression model also displayed robust discriminatory ability (AUC=0.796), while the decision tree model exhibited the lowest AUC (0.656; <xref ref-type="fig" rid="figure2">Figure 2C</xref>).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Receiver operating characteristic (ROC) curves of coronary heart disease (CHD) risk prediction models constructed by 7 machine learning algorithms in Hyperhomocysteinemia patients ROC) curves were utilized to assess the discriminative performance of 7 distinct machine learning models, including logistic regression, k-nearest neighbors (KNN), decision tree, random forest, extreme gradient boosting (XGBoost), light gradient boosting machine (LightGBM), and stacking, on various datasets: the training set, validation set, and test set denoted as (A), (B), and (C) respectively. The area under the receiver operating characteristic curve (AUC) was employed as a metric to quantify the models&#x2019; performance, where increased values are indicative of enhanced capability in discriminating between individuals with CHD and those without.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80809_fig02.png"/></fig><p>Upon analyzing the PR curves, distinct variations in model efficacy were evident across the three datasets. In the training dataset, the random forest model exhibited the highest PR-AUC value of 0.904, outperforming XGBoost (0.890), stacking (0.853), and LightGBM (0.770), underscoring its robust classification capability on the internal data. Conversely, the Logistic model displayed limited discriminatory power with an AUC of 0.494, while the decision tree and KNN models achieved PR-AUCs of 0.699 and 0.645, respectively (<xref ref-type="fig" rid="figure3">Figure 3A</xref>).</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Precision-recall (PR) curves of coronary heart disease (CHD) risk prediction models constructed by the 7 machine learning algorithms. PR curves evaluate model performance in imbalanced datasets, emphasizing precision (positive predictive value) and recall (sensitivity). The results for the training set are displayed in (A), the validation set in (B), and the test set in (C). The PR-AUC (area under the precision-recall curve) is reported for each model, with greater values denoting better classification performance for the positive class (CHD). KNN: k-nearest neighbor; LightGBM: light gradient boosting machine; XGBoost: extreme gradient boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80809_fig03.png"/></fig><p>In the validation dataset, the LightGBM model demonstrated the highest PR-AUC of 0.750, followed closely by stacking (0.748) and KNN (0.741). The XGBoost and random forest models performed comparably with AUCs of 0.740 and 0.697, respectively, whereas the logistic model yielded a PR-AUC of 0.667, and the decision tree model slightly lagged behind with an AUC of 0.698 (<xref ref-type="fig" rid="figure3">Figure 3B</xref>).</p><p>In the testing dataset, the LightGBM (AUC=0.596) and stacking (AUC=0.604) models exhibited moderate PR performance. The logistic model followed with an AUC of 0.607. Conversely, XGBoost (0.567), random forest (0.557), and KNN (0.544) demonstrated slightly lower PR-AUCs, with the decision tree model displaying the weakest performance (AUC=0.364; <xref ref-type="fig" rid="figure3">Figure 3C</xref>).</p></sec><sec id="s3-5"><title>ROC Curve Analysis of Risk Prediction Models Based on 10-Fold Cross-Validation</title><p>ROC analysis using 10-fold cross-validation was used to assess the efficacy of 3 leading models: logistic regression, LightGBM, and stacking. As depicted in <xref ref-type="fig" rid="figure4">Figure 4A</xref>, during the training phase, the LightGBM model demonstrated the highest average AUC of 0.691, marginally surpassing logistic regression (AUC=0.679) and stacking (AUC=0.677), underscoring its superior discriminatory capability. Additionally, as illustrated in <xref ref-type="fig" rid="figure4">Figure 4B</xref>, following hyperparameter optimization, LightGBM&#x2019;s performance was further bolstered, with its mean AUC escalating from 0.689 (0.025) to 0.701 (0.074). This enhancement substantiates LightGBM&#x2019;s resilience across iterations. Consequently, LightGBM was ultimately designated as the preferred model for subsequent analyses.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Receiver operating characteristic (ROC) curve analysis of risk prediction models based on 10-fold cross-validation. (A) ROC curves were generated for the 3 best-performing models, namely logistic regression, light gradient boosting machine (LightGBM), and stacking, through 10-fold cross-validation on the training dataset. LightGBM demonstrated the highest average area under the receiver operating characteristic curve (AUC) of 0.691, with logistic regression and stacking following closely at 0.679 and 0.677, respectively. (B) A comparison of LightGBM performance prehyperparameter and posthyperparameter optimization revealed enhancements. The optimized model exhibited a higher mean AUC of 0.701 (SD 0.074), surpassing the original model&#x2019;s performance of 0.689 (SD 0.025), thus validating its increased stability.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80809_fig04.png"/></fig></sec><sec id="s3-6"><title>Calibration Curves and Brier Score Analysis</title><p>Calibration curves and Brier scores were utilized to assess the alignment between predicted probabilities and observed outcomes across various datasets. In the training set (<xref ref-type="fig" rid="figure5">Figure 5A</xref>), all models exhibited moderate calibration performance. The KNN model displayed the lowest Brier score (0.2134), closely trailed by random forest (0.240), logistic regression (0.2482), and XGBoost (0.2487). Within the validation set (<xref ref-type="fig" rid="figure5">Figure 5B</xref>), random forest exhibited favorable calibration (Brier score=0.2614), with XGBoost (0.2662) and KNN (0.2758) also demonstrating relatively strong performance. In the test set (<xref ref-type="fig" rid="figure5">Figure 5C</xref>), logistic regression demonstrated the most accurate calibration (Brier score=0.2507), followed by XGBoost (0.2737) and KNN (0.2629). While the Brier scores indicated acceptable calibration for multiple models, no single model consistently outperformed others across all 3 datasets. Nevertheless, XGBoost and KNN showcased consistent and favorable calibration performance.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Calibration curves and Brier scores of 6 machine learning models. Calibration curves were utilized to evaluate the concordance between predicted probabilities and observed outcomes of coronary heart disease (CHD) across 3 sets: the training set, validation set, and test set. The diagonal dotted line in the graphs symbolizes ideal calibration, with Brier scores (inset) serving as a measure of prediction accuracy (lower scores indicating improved calibration). Both extreme gradient boosting (XGBoost) and k-nearest neighbors (KNN) models demonstrated consistent calibration performance across all datasets.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80809_fig05.png"/></fig></sec><sec id="s3-7"><title>Clinical Utility Evaluation Based on Decision Curve Analysis</title><p>Decision curve analysis was used to assess the clinical efficacy of various machine learning models on the training, validation, and test sets. <xref ref-type="fig" rid="figure6">Figure 6</xref> illustrates that the KNN model yielded the highest average net benefit across all 3 datasets (training: 0.2984, validation: 0.2974, and test: 0.281), closely trailed by LightGBM (training: 0.3262, validation: 0.264, and test: 0.2415) and XGBoost (train: 0.3084, validation: 0.2615, and test: 0.2383). These models consistently outperformed the &#x201C;Treat None&#x201D; and &#x201C;Treat All&#x201D; strategies in terms of net benefit over a broad range of probability thresholds. Notably, LightGBM and KNN consistently demonstrated superior clinical utility, particularly within the 0.3&#x2010;0.7 threshold range, suggesting their potential for guiding personalized preventive interventions in practical healthcare settings.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Decision curve analysis (DCA) of 6 machine learning models. The clinical utility of models is assessed by DCA through the calculation of net benefit at different probability thresholds in the training, validation, and test sets. Net benefit is determined as the difference between the proportion of correctly identified cases of coronary heart disease (CHD) and the proportion of unnecessary interventions. K-nearest neighbors (KNN), light gradient boosting machine (LightGBM), and extreme gradient boosting (XGBoost) consistently exhibited superior net benefit compared to strategies of treating all or treating none, especially within the 0.3&#x2010;0.7 threshold range.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80809_fig06.png"/></fig></sec><sec id="s3-8"><title>SHAP-Based Interpretation of the Optimal LightGBM Model</title><p>To enhance the interpretability of the LightGBM model, SHAP values were utilized to assess the significance of individual features on the model&#x2019;s predictions. The feature importance summary plot, derived from the mean absolute SHAP values, identified age, APTT, hypertension, weight, carotid plaque, and continuous drinking history as the primary predictors influencing the risk of CHD in patients with HHcy (<xref ref-type="fig" rid="figure7">Figure 7A</xref>). Notably, age and APTT exhibited the most substantial average impact, underscoring their pivotal roles in the model&#x2019;s predictive capabilities.</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Shapley Additive Explanation (SHAP)&#x2013;based interpretation and feature correlation analysis of the light gradient boosting machine (LightGBM) model. (A) A SHAP summary plot displays the average absolute impact of individual features on the prediction of coronary heart disease (CHD) risk, highlighting age and activated partial thromboplastin time (APTT) as the most influential factors. (B) A SHAP dependence plot illustrates the direction (positive or negative) and strength of feature effects on the model&#x2019;s output, indicating that higher age and APTT levels are associated with an increased risk of CHD. (C) A Pearson correlation heatmap of key features demonstrates minimal multicollinearity (|<italic>r</italic>|&#x003C;0.5 for all features except continuous drinking or smoking history, where <italic>r</italic>=0.54), thereby ensuring the stability of the model. Cr: creatinine.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80809_fig07.png"/></fig><p>The SHAP dependence plot (<xref ref-type="fig" rid="figure7">Figure 7B</xref>) illustrates the impact of each feature on the model output in terms of both direction and magnitude. The elevated values of age and APTT were consistently linked to a higher predicted risk of CHD, with hypertension and carotid plaque presence also notably increasing the SHAP value. Notably, the influence of continuous drinking history exhibited variability across individual patient profiles, suggesting potential interactions with other variables.</p><p>A feature correlation heatmap (<xref ref-type="fig" rid="figure7">Figure 7C</xref>) was generated to evaluate multicollinearity and the interplay among essential features. The heatmap reveals a moderate positive correlation between continuous drinking history and continuous smoking history (<italic>r</italic>=0.54) and a slight negative correlation between age and weight (<italic>r</italic>=&#x2013;0.27). These findings endorse the incorporation of these variables into the LightGBM model without introducing unnecessary redundancy.</p><p>These results demonstrate that the LightGBM model offers both superior predictive accuracy and the ability to reveal the significance and interplay of clinical features. This capability can enhance personalized risk evaluation and targeted preventive measures for coronary heart disease in individuals with HHcy.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>This research conducted a comparative analysis of 7 machine learning algorithms to forecast the likelihood of CHD in patients with HHcy using authentic single-center electronic health records. The models were developed utilizing 6 fundamental variables (age, weight, hypertension, history of sustained alcohol consumption, APTT, and carotid artery plaque), and their discriminatory capacity, calibration, and clinical applicability were thoroughly assessed across training, validation, and test datasets. The findings revealed that the LightGBM model exhibited superior overall performance, demonstrating the highest AUC (0.807) and a relatively elevated <italic>F</italic><sub>1</sub>-score (0.636) in the test set, underscoring its promising clinical predictive capability.</p><p>A significant discovery is that the model does not depend on variables specific to CHD or HHcy. Instead, it opts for indicators that are commonly available and extensively standardized in routine clinical settings, including coagulation function (activated partial thromboplastin time), arterial structure markers (presence of carotid artery plaques), basal metabolism (body weight), and behavioral aspects (alcohol consumption history). These variables do not necessitate expert image analysis or sophisticated molecular assays, thereby augmenting the model&#x2019;s practicality in primary health care facilities or noncardiovascular disciplines.</p><p>Upon applying the SHAP algorithm to interpret the model, we validated that age and APTT are the most impactful variables within the model. Notably, the predictive significance of APTT warrants special consideration. Serving as a marker for the intrinsic coagulation pathway, it could potentially signify a hypercoagulable state within the HHcy population. This pathological characteristic has been identified as a key factor contributing to accelerated atherosclerosis [<xref ref-type="bibr" rid="ref22">22</xref>]. The SHAP dependence plot and correlation heatmap provided additional confirmation of the minimal collinearity among variables, thereby augmenting the interpretive clarity and reliability of the model.</p><p>The stacking model exhibits a high AUC of 0.800 in the validation set; however, its <italic>F</italic><sub>1</sub>-score decreases to 0.33 in the test set, with a sensitivity of only 0.26, indicating limited generalization ability and a predisposition toward overfitting. Conversely, the LightGBM model demonstrates consistent performance across the training, validation, and test sets, underscoring its robustness in diverse sample scenarios. While the logistic regression model shows a slightly lower AUC, its balanced <italic>F</italic><sub>1</sub>-score, sensitivity, and specificity underscore its reliability in terms of generalization stability.</p><p>In this study, we aim to streamline variable processing by avoiding artificial binning, segmentation, high-dimensional mapping transformations, and the creation of interaction terms. This approach is intended to enhance the model&#x2019;s generalizability across different datasets by minimizing reliance on specific data structures. Additionally, we refrain from using oversampling methods, such as synthetic minority over-sampling technique, to mitigate issues related to feature distribution drift resulting from synthetic sample generation. Instead, we tackle data imbalance by adjusting category weights and optimizing thresholds.</p><p>However, this study is subject to several limitations. First, the data were sourced from a singular medical institution, potentially introducing regional, health care&#x2013;seeking motivation, or detection biases. For instance, individuals with high homocysteine levels who consented to coagulation function tests may exhibit distinct health awareness and disease profiles compared to those who declined the tests. Second, the retrospective design of the study resulted in the exclusion of crucial variables that could impact CHD risk, such as dietary patterns, nutritional status, and renal function. Consequently, the interpretation of changes in serum markers remains inconclusive. Furthermore, the study solely predicted the present CHD status of patients documented in electronic health records. The absence of longitudinal follow-up data precluded the modeling of event timing or disease progression. To address these limitations, future research should integrate prospective cohorts and long-term follow-up data to validate the model&#x2019;s reliability and establish a dynamic risk assessment framework.</p><p>In conclusion, the LightGBM model developed in this study exhibits exceptional predictive accuracy and interpretability in individuals with HHcy, offering a robust basis for real-world implementation. We propose its consideration as a potential tool for early CHD risk stratification to aid clinicians in promptly identifying high-risk patients. Subsequent investigations may concentrate on external validation and model refinement in a multicenter context.</p></sec></body><back><notes><sec><title>Funding</title><p>This research was supported by the National Natural Youth Science Foundation of China (grant numbers 82205021 and 82405290) and the Science and Technology Innovation Guidance Plan Project in the Medical and Health Field of Zhengzhou City (grant number 2024YLZDJH075).</p></sec><sec><title>Data Availability</title><p>The datasets produced and/or scrutinized in this study are not publicly accessible to safeguard patient confidentiality and comply with institutional guidelines. However, interested parties may obtain the data from the corresponding author upon request.</p></sec></notes><fn-group><fn fn-type="con"><p>The study was designed by and data analysis was supervised by MKL and MYD. Data collection was conducted by HL, HY, XL, and YL. MKL and MYD contributed to manuscript writing and revision. All authors have reviewed and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ACC/AHA</term><def><p>American College of Cardiology/American Heart Association</p></def></def-item><def-item><term id="abb2">APTT</term><def><p>activated partial thromboplastin time</p></def></def-item><def-item><term id="abb3">AUC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb4">CHD</term><def><p>coronary heart disease</p></def></def-item><def-item><term id="abb5">HHcy</term><def><p>hyperhomocysteinemia</p></def></def-item><def-item><term id="abb6">KNN</term><def><p>k-nearest neighbor</p></def></def-item><def-item><term id="abb7">LightGBM</term><def><p>light gradient boosting machine</p></def></def-item><def-item><term id="abb8">PR</term><def><p>precision-recall</p></def></def-item><def-item><term id="abb9">ROC</term><def><p>receiver operating characteristic</p></def></def-item><def-item><term id="abb10">SHAP</term><def><p>Shapley Additive Explanation</p></def></def-item><def-item><term id="abb11">XGBoost</term><def><p>extreme gradient boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gu&#x00E9;ant</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Gu&#x00E9;ant-Rodriguez</surname><given-names>RM</given-names> </name><name name-style="western"><surname>Oussalah</surname><given-names>A</given-names> </name><name name-style="western"><surname>Zuily</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rosenberg</surname><given-names>I</given-names> </name></person-group><article-title>Hyperhomocysteinemia in cardiovascular diseases: revisiting observational studies and clinical trials</article-title><source>Thromb Haemost</source><year>2023</year><month>03</month><volume>123</volume><issue>3</issue><fpage>270</fpage><lpage>282</lpage><pub-id pub-id-type="doi">10.1055/a-1952-1946</pub-id><pub-id pub-id-type="medline">36170884</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tinelli</surname><given-names>C</given-names> </name><name name-style="western"><surname>Di Pino</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ficulle</surname><given-names>E</given-names> </name><name name-style="western"><surname>Marcelli</surname><given-names>S</given-names> </name><name name-style="western"><surname>Feligioni</surname><given-names>M</given-names> </name></person-group><article-title>Hyperhomocysteinemia as a risk factor and potential nutraceutical target for certain pathologies</article-title><source>Front Nutr</source><year>2019</year><volume>6</volume><fpage>49</fpage><pub-id pub-id-type="doi">10.3389/fnut.2019.00049</pub-id><pub-id pub-id-type="medline">31069230</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Balint</surname><given-names>B</given-names> </name><name name-style="western"><surname>Jepchumba</surname><given-names>VK</given-names> </name><name name-style="western"><surname>Gu&#x00E9;ant</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Gu&#x00E9;ant-Rodriguez</surname><given-names>RM</given-names> </name></person-group><article-title>Mechanisms of homocysteine-induced damage to the endothelial, medial and adventitial layers of the arterial wall</article-title><source>Biochimie</source><year>2020</year><month>06</month><volume>173</volume><fpage>100</fpage><lpage>106</lpage><pub-id pub-id-type="doi">10.1016/j.biochi.2020.02.012</pub-id><pub-id pub-id-type="medline">32105811</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Herrero-Fernandez</surname><given-names>B</given-names> </name><name name-style="western"><surname>Gomez-Bris</surname><given-names>R</given-names> </name><name name-style="western"><surname>Somovilla-Crespo</surname><given-names>B</given-names> </name><name name-style="western"><surname>Gonzalez-Granado</surname><given-names>JM</given-names> </name></person-group><article-title>Immunobiology of atherosclerosis: a complex net of interactions</article-title><source>Int J Mol Sci</source><year>2019</year><month>10</month><day>24</day><volume>20</volume><issue>21</issue><fpage>5293</fpage><pub-id pub-id-type="doi">10.3390/ijms20215293</pub-id><pub-id pub-id-type="medline">31653058</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cetin</surname><given-names>E</given-names> </name><name name-style="western"><surname>Raby</surname><given-names>AC</given-names> </name></person-group><article-title>Understanding atherosclerotic plaque cellular composition: recent advances driven by single cell omics</article-title><source>Cells</source><year>2025</year><month>05</month><day>23</day><volume>14</volume><issue>11</issue><fpage>770</fpage><pub-id pub-id-type="doi">10.3390/cells14110770</pub-id><pub-id pub-id-type="medline">40497946</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alaa</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Bolton</surname><given-names>T</given-names> </name><name name-style="western"><surname>Di Angelantonio</surname><given-names>E</given-names> </name><name name-style="western"><surname>Rudd</surname><given-names>JHF</given-names> </name><name name-style="western"><surname>van der Schaar</surname><given-names>M</given-names> </name></person-group><article-title>Cardiovascular disease risk prediction using automated machine learning: a prospective study of 423,604 UK Biobank participants</article-title><source>PLoS One</source><year>2019</year><volume>14</volume><issue>5</issue><fpage>e0213653</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0213653</pub-id><pub-id pub-id-type="medline">31091238</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wysocki</surname><given-names>A</given-names> </name><name name-style="western"><surname>Fu&#x0142;ek</surname><given-names>M</given-names> </name><name name-style="western"><surname>Macek</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Ultrasound carotid plaque score and severity of coronary artery disease assessed by computed tomography angiography in patients with arterial hypertension</article-title><source>Diagnostics (Basel)</source><year>2024</year><month>05</month><day>25</day><volume>14</volume><issue>11</issue><fpage>1101</fpage><pub-id pub-id-type="doi">10.3390/diagnostics14111101</pub-id><pub-id pub-id-type="medline">38893628</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pal</surname><given-names>P</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>HV</given-names> </name><name name-style="western"><surname>Grover</surname><given-names>V</given-names> </name><name name-style="western"><surname>Manikandan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Karimi</surname><given-names>R</given-names> </name><name name-style="western"><surname>Khishe</surname><given-names>M</given-names> </name></person-group><article-title>Interactive cardiovascular disease prediction system using learning techniques: Insights from extensive experiments</article-title><source>Results Control Optim</source><year>2025</year><month>06</month><volume>19</volume><fpage>100560</fpage><pub-id pub-id-type="doi">10.1016/j.rico.2025.100560</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hajihosseinlou</surname><given-names>M</given-names> </name><name name-style="western"><surname>Maghsoudi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ghezelbash</surname><given-names>R</given-names> </name></person-group><article-title>A novel scheme for mapping of MVT&#x2011;type Pb&#x2013;Zn prospectivity: LightGBM, a highly efficient gradient boosting decision tree machine learning algorithm</article-title><source>Nat Resour Res</source><year>2023</year><month>12</month><volume>32</volume><issue>6</issue><fpage>2417</fpage><lpage>2438</lpage><pub-id pub-id-type="doi">10.1007/s11053-023-10249-6</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gulum</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Trombley</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Ozen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Esen</surname><given-names>E</given-names> </name><name name-style="western"><surname>Aksamoglu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kantardzic</surname><given-names>M</given-names> </name></person-group><article-title>Why are explainable AI methods for prostate lesion detection rated poorly by radiologists?</article-title><source>Appl Sci (Basel)</source><year>2024</year><volume>14</volume><issue>11</issue><fpage>4654</fpage><pub-id pub-id-type="doi">10.3390/app14114654</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Obeagu</surname><given-names>EI</given-names> </name><name name-style="western"><surname>Ezeanya</surname><given-names>CU</given-names> </name><name name-style="western"><surname>Ogenyi</surname><given-names>FC</given-names> </name><name name-style="western"><surname>Ifu</surname><given-names>DD</given-names> </name></person-group><article-title>Big data analytics and machine learning in hematology: transformative insights, applications and challenges</article-title><source>Medicine (Baltimore)</source><year>2025</year><volume>104</volume><issue>10</issue><fpage>e41766</fpage><pub-id pub-id-type="doi">10.1097/MD.0000000000041766</pub-id><pub-id pub-id-type="medline">40068020</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weng</surname><given-names>SF</given-names> </name><name name-style="western"><surname>Reps</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kai</surname><given-names>J</given-names> </name><name name-style="western"><surname>Garibaldi</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Qureshi</surname><given-names>N</given-names> </name></person-group><article-title>Can machine-learning improve cardiovascular risk prediction using routine clinical data?</article-title><source>PLoS One</source><year>2017</year><volume>12</volume><issue>4</issue><fpage>e0174944</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0174944</pub-id><pub-id pub-id-type="medline">28376093</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Williams</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Levine</surname><given-names>GN</given-names> </name><name name-style="western"><surname>Kalra</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Correction to: 2025 AHA/ACC clinical performance and quality measures for patients with chronic coronary disease: a report of the American College of Cardiology/American Heart Association joint committee on performance measures</article-title><source>Circ Cardiovasc Qual Outcomes</source><year>2025</year><month>07</month><volume>18</volume><issue>7</issue><fpage>e000141</fpage><pub-id pub-id-type="doi">10.1161/HCQ.0000000000000141</pub-id><pub-id pub-id-type="medline">40663623</pub-id></nlm-citation></ref><ref id="ref14"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Byrne</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Rossello</surname><given-names>X</given-names> </name><name name-style="western"><surname>Coughlan</surname><given-names>JJ</given-names> </name><etal/></person-group><article-title>2023 ESC Guidelines for the management of acute coronary syndromes</article-title><source>Eur Heart J</source><year>2023</year><month>10</month><day>12</day><volume>44</volume><issue>38</issue><fpage>3720</fpage><lpage>3826</lpage><pub-id pub-id-type="doi">10.1093/eurheartj/ehad191</pub-id><pub-id pub-id-type="medline">37622654</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Visseren</surname><given-names>FLJ</given-names> </name><name name-style="western"><surname>Mach</surname><given-names>F</given-names> </name><name name-style="western"><surname>Smulders</surname><given-names>YM</given-names> </name><etal/></person-group><article-title>2021 ESC Guidelines on cardiovascular disease prevention in clinical practice</article-title><source>Eur J Prev Cardiol</source><year>2022</year><month>02</month><day>19</day><volume>29</volume><issue>1</issue><fpage>5</fpage><lpage>115</lpage><pub-id pub-id-type="doi">10.1093/eurjpc/zwab154</pub-id><pub-id pub-id-type="medline">34558602</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Smeden</surname><given-names>M</given-names> </name><name name-style="western"><surname>de Groot</surname><given-names>JAH</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name><etal/></person-group><article-title>No rationale for 1 variable per 10 events criterion for binary logistic regression analysis</article-title><source>BMC Med Res Methodol</source><year>2016</year><month>11</month><day>24</day><volume>16</volume><issue>1</issue><fpage>163</fpage><pub-id pub-id-type="doi">10.1186/s12874-016-0267-3</pub-id><pub-id pub-id-type="medline">27881078</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Smeden</surname><given-names>M</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KG</given-names> </name><name name-style="western"><surname>de Groot</surname><given-names>JA</given-names> </name><etal/></person-group><article-title>Sample size for binary logistic prediction models: beyond events per variable criteria</article-title><source>Stat Methods Med Res</source><year>2019</year><month>08</month><volume>28</volume><issue>8</issue><fpage>2455</fpage><lpage>2474</lpage><pub-id pub-id-type="doi">10.1177/0962280218784726</pub-id><pub-id pub-id-type="medline">29966490</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Riley</surname><given-names>RD</given-names> </name><name name-style="western"><surname>Snell</surname><given-names>KI</given-names> </name><name name-style="western"><surname>Ensor</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Minimum sample size for developing a multivariable prediction model: part II&#x2014;binary and time-to-event outcomes</article-title><source>Stat Med</source><year>2019</year><month>03</month><day>30</day><volume>38</volume><issue>7</issue><fpage>1276</fpage><lpage>1296</lpage><pub-id pub-id-type="doi">10.1002/sim.7992</pub-id><pub-id pub-id-type="medline">30357870</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Riley</surname><given-names>RD</given-names> </name><name name-style="western"><surname>Snell</surname><given-names>KIE</given-names> </name><name name-style="western"><surname>Ensor</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Minimum sample size for developing a multivariable prediction model: part I&#x2014;continuous outcomes</article-title><source>Stat Med</source><year>2019</year><month>03</month><day>30</day><volume>38</volume><issue>7</issue><fpage>1262</fpage><lpage>1275</lpage><pub-id pub-id-type="doi">10.1002/sim.7993</pub-id><pub-id pub-id-type="medline">30347470</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>D&#x2019;Agostino</surname><given-names>RB</given-names>  <suffix>Sr</suffix></name><name name-style="western"><surname>Vasan</surname><given-names>RS</given-names> </name><name name-style="western"><surname>Pencina</surname><given-names>MJ</given-names> </name><etal/></person-group><article-title>General cardiovascular risk profile for use in primary care: the Framingham Heart study</article-title><source>Circulation</source><year>2008</year><month>02</month><day>12</day><volume>117</volume><issue>6</issue><fpage>743</fpage><lpage>753</lpage><pub-id pub-id-type="doi">10.1161/CIRCULATIONAHA.107.699579</pub-id><pub-id pub-id-type="medline">18212285</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goff</surname><given-names>DC</given-names>  <suffix>Jr</suffix></name><name name-style="western"><surname>Lloyd-Jones</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Bennett</surname><given-names>G</given-names> </name><etal/></person-group><article-title>2013 ACC/AHA guideline on the assessment of cardiovascular risk: a report of the American College of Cardiology/American Heart Association Task Force on Practice Guidelines</article-title><source>Circulation</source><year>2014</year><month>06</month><day>24</day><volume>129</volume><issue>25 Suppl 2</issue><fpage>S49</fpage><lpage>73</lpage><pub-id pub-id-type="doi">10.1161/01.cir.0000437741.48606.98</pub-id><pub-id pub-id-type="medline">24222018</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yin</surname><given-names>H</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>X</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>High perceived stress may shorten activated partial thromboplastin time and lead to worse clinical outcomes in patients with coronary heart disease</article-title><source>Front Cardiovasc Med</source><year>2021</year><volume>8</volume><fpage>769857</fpage><pub-id pub-id-type="doi">10.3389/fcvm.2021.769857</pub-id><pub-id pub-id-type="medline">34912866</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Detailed data extraction and quality-control protocols.</p><media xlink:href="medinform_v14i1e80809_app1.docx" xlink:title="DOCX File, 18 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Grid search ranges and final hyperparameters for all machine learning models.</p><media xlink:href="medinform_v14i1e80809_app2.docx" xlink:title="DOCX File, 18 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Threshold optimization curve for the light gradient boosting machine (LightGBM) model.</p><media xlink:href="medinform_v14i1e80809_app3.pdf" xlink:title="PDF File, 223 KB"/></supplementary-material></app-group></back></article>