<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e76659</article-id><article-id pub-id-type="doi">10.2196/76659</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Estimating 10-Year Cardiovascular Disease Risk in Primary Prevention Using UK Electronic Health Records and a Hybrid Multitask BERT Model: Retrospective Cohort Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Liu</surname><given-names>Tianyi</given-names></name><degrees>BSc, MSc</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Lu</surname><given-names>Lei</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Yanzhong</given-names></name><degrees>Prof Dr</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Krentz</surname><given-names>Andrew J</given-names></name><degrees>ChB, MD, Prof Dr</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Curcin</surname><given-names>Vasa</given-names></name><degrees>Prof Dr</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Population Health Sciences, School of Life Course &#x0026; Population Sciences, King's College London</institution><addr-line>Addison House, Guy&#x2019;s Campus</addr-line><addr-line>London</addr-line><country>United Kingdom</country></aff><aff id="aff2"><institution>Metadvice</institution><addr-line>London</addr-line><country>United Kingdom</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Benis</surname><given-names>Arriel</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Manlhiot</surname><given-names>Cedric</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Dong</surname><given-names>Jing</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Tianyi Liu, BSc, MSc, Department of Population Health Sciences, School of Life Course &#x0026; Population Sciences, King's College London, Addison House, Guy&#x2019;s Campus, London, SE1 1UL, United Kingdom, 44 07422940311; <email>tianyi.4.liu@kcl.ac.uk</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>13</day><month>11</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e76659</elocation-id><history><date date-type="received"><day>28</day><month>04</month><year>2025</year></date><date date-type="rev-recd"><day>14</day><month>10</month><year>2025</year></date><date date-type="accepted"><day>16</day><month>10</month><year>2025</year></date></history><copyright-statement>&#x00A9; Tianyi Liu, Lei Lu, Yanzhong Wang, Andrew J Krentz, Vasa Curcin. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 13.11.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e76659"/><abstract><sec><title>Background</title><p>Cardiovascular disease (CVD) remains a leading cause of preventable morbidity and mortality, highlighting the need for early risk stratification in primary prevention. Traditional Cox models assume proportional hazards and linear effects, limiting flexibility. While machine learning offers greater expressiveness, many models rely solely on structured data and overlook time-to-event (TTE) information. Integrating structured and textual representations may enhance prediction and support equitable assessment across clinical subgroups.</p></sec><sec><title>Objective</title><p>This study aims to develop a hybrid multitask deep learning model (MT-BERT [multitask Bidirectional Encoder Representations from Transformers]) integrating structured and textual features from electronic health records (EHRs) to predict 10-year CVD risk, enhancing individualized stratification and supporting equitable assessment across diverse demographic groups.</p></sec><sec sec-type="methods"><title>Methods</title><p>We used data from Clinical Practice Research Datalink (CPRD) Aurum comprising 469,496 patients aged 40&#x2010;85 years to develop MT-BERT for 10-year CVD risk prediction. Structured EHR variables and their corresponding textual representations were jointly encoded using a multilayer perceptron and a distilled version of the BERT model (DistilBERT), respectively. A fusion layer and stacked multihead attention modules enabled cross-modal interaction modeling. The model generated both binary classification outputs and TTE risk scores, optimized using a custom FocalCoxLoss function with uncertainty-based weighting. Prediction targets encompassed composite and individual CVD outcomes. Model performance was evaluated using the area under the receiver operating characteristic curve (AUROC), concordance index, and Brier score, with subgroup analyses by ethnicity and deprivation, and heterogeneity assessed using Higgins <italic>I</italic>&#x00B2; and Cochran Q statistics. Generalizability was assessed via external validation in a held-out London cohort.</p></sec><sec sec-type="results"><title>Results</title><p>The MT-BERT model yielded AUROC values of 0.744 (95% CI 0.738&#x2010;0.749) in males and 0.782 (95% CI 0.768&#x2010;0.796) in females on the test set (n=711,052), and 0.736 (95% CI 0.729&#x2010;0.741) and 0.775 (95% CI 0.768&#x2010;0.780), respectively in &#x201C;spatial external&#x201D; validation (n=144,370). Brier scores were 0.130 in males and 0.091 in females. Individuals classified as high-risk (&#x2265;40% risk in males and &#x2265;34% in females) demonstrated significantly reduced 10-year event-free survival relative to lower-risk individuals (log-rank <italic>P</italic>&#x003C;.001). Model performance was consistently higher in females across all metrics. Subgroup analyses revealed substantial heterogeneity across ethnicity and deprivation (<italic>I</italic>&#x00B2;&#x003E;70%), especially among males, with lower AUROC in South Asian and Black ethnic groups. These findings reflect variation in model performance across demographic groups while supporting its applicability to large-scale CVD risk stratification.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The proposed hybrid MT-BERT model predicts 10-year CVD risk for primary prevention by integrating structured variables and unstructured clinical text from EHRs. Its multitask design facilitates both individualized risk stratification and TTE estimation. While performance was modestly reduced in deprived and minority ethnic subgroups, these findings provide preliminary support for advancing equity-aware, data-driven prevention strategies in increasingly diverse health care settings.</p></sec></abstract><kwd-group><kwd>cardiovascular diseases</kwd><kwd>risk prediction</kwd><kwd>deep learning</kwd><kwd>electronic health records</kwd><kwd>health equity</kwd><kwd>survival analysis</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Cardiovascular disease (CVD) is a major public health challenge, accounting for 1 in 4 deaths in the United Kingdom and affecting 7.6 million individuals. In 2022, over 1,00,000 hospital admissions were attributed to acute coronary syndromes [<xref ref-type="bibr" rid="ref1">1</xref>]. Beyond morbidity and mortality, CVD imposes US $15 billion in annual health care costs and US $35 billion in economic losses. Additionally, 6&#x2010;8 million individuals have undiagnosed or uncontrolled hypertension, further increasing preventable CVD-related complications [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>].</p><p>To address this, the United Kingdom has implemented national prevention strategies, with the UK&#x2019;s National Institute for Health and Care Excellence (NICE) guidelines [<xref ref-type="bibr" rid="ref3">3</xref>] recommending CVD risk assessments for individuals aged &#x2265;40 years every 5 years via general practitioners (GPs). These assessments rely on structured clinical data and established 10-year risk prediction models such as QRISK3 [<xref ref-type="bibr" rid="ref4">4</xref>] (developed and validated in UK populations) and Pooled Cohort Equations [<xref ref-type="bibr" rid="ref5">5</xref>] (developed in US cohorts). Despite their widespread use, these conventional models rely on Cox regression, whose linear and proportional assumptions hinder their ability to model complex risk interactions, thereby limiting predictive performance in clinical practice [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>Significant disparities in CVD burden persist. For instance, South Asians in the United Kingdom have nearly twice the coronary heart disease (CHD) incidence of White Europeans, with South Asian females experiencing rates similar to White European males [<xref ref-type="bibr" rid="ref8">8</xref>]. Black African and Caribbean individuals do not show an elevated risk but receive fewer preventive treatments despite clinical indications [<xref ref-type="bibr" rid="ref9">9</xref>]. Socioeconomic deprivation further exacerbates disparities, with higher CVD risk factor prevalence, hospital admissions, lower treatment uptake, and twice the mortality rate for individuals younger than 75 years in the most deprived areas [<xref ref-type="bibr" rid="ref10">10</xref>]. Although conventional models can incorporate subgroup characteristics, they often lack the flexibility to model the complex, interdependent factors contributing to these disparities [<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>Machine learning (ML) approaches have emerged as promising alternatives to traditional CVD risk estimation models. A range of ML methods, from logistic regression and decision trees to ensemble techniques such as boosting, bagging, and stacking, have been explored for CVD risk prediction [<xref ref-type="bibr" rid="ref6">6</xref>]. However, particularly in the context of primary prevention, many existing ML-based algorithms are limited by relatively small sample sizes, heterogeneous data quality, and a lack of consistent external validation, which may restrict their clinical applicability. Moreover, while some recent ML-based survival models, such as DeepSurv (Deep Neural Network&#x2013;based Cox Model) [<xref ref-type="bibr" rid="ref12">12</xref>], incorporate time-to-event (TTE) information, many CVD risk prediction algorithms continue to treat outcomes as binary events, without explicitly addressing censoring or time-dependent risk [<xref ref-type="bibr" rid="ref7">7</xref>]. In addition, most models primarily use structured clinical variables and often overlook unstructured sources, such as physician notes and patient-reported symptoms, that could provide important contextual information for risk stratification [<xref ref-type="bibr" rid="ref13">13</xref>].</p><p>Natural language processing (NLP), particularly Transformer-based models such as BERT [<xref ref-type="bibr" rid="ref14">14</xref>] (Bidirectional Encoder Representations from Transformers), presents a novel approach to integrating unstructured clinical text into CVD risk prediction. Transformers use self-attention mechanisms to model long-range dependencies and complex relationships between variables. As an encoder-based Transformer model, BERT generates dense, context-rich embeddings by capturing bidirectional contextual information. Pretrained on large-scale corpora through transfer learning, BERT can effectively extract meaningful features from clinical text. Given that patient comorbidities, lifestyle factors, and social determinants are often recorded in unstructured formats, NLP models such as BERT can bridge critical gaps in cardiovascular risk assessment.</p><p>With increasing digital health adoption, NLP-driven models offer a scalable and cost-effective approach to CVD risk stratification. In particular, pretrained Transformer-based architectures such as BERT enable efficient fine-tuning on domain-specific datasets without the need for extensive retraining, reducing resource demands for clinical implementation. By integrating structured electronic health record (EHR) data with unstructured text sources such as patient interactions, chatbots, and clinical notes, future models could further enhance risk prediction and address limitations of conventional approaches. In this study, we emulate such integration by transforming structured variables into textual representations, providing an initial step toward leveraging both data modalities. Transformer-based models can better capture complex risk patterns across diverse populations, supporting more timely and equitable cardiovascular risk assessment for primary prevention.</p></sec><sec id="s1-2"><title>Rationale and Objectives</title><p>This study aims to develop a hybrid Multitask BERT model (MT-BERT) for prognostic CVD risk assessment, integrating structured and textual representations derived from UK EHR data using phenotyping techniques. The model is designed to support primary care decision-making by predicting 10-year CVD risk to guide early intervention, statin initiation, and lifestyle modification, potentially enhancing or replacing conventional statistical approaches.</p><p>While traditional survival models rely primarily on structured clinical variables, the proposed MT-BERT approach incorporates unstructured information (eg, physician notes and comorbidity narratives) to improve predictive performance, particularly among underrepresented high-risk populations.</p><p>To achieve this, we develop a model for both CVD outcome classification and TTE hazard ranking, enhancing individualized risk stratification and addressing biases in structured datasets. We also introduce a novel FocalCoxLoss function, enabling uncertainty-weighted joint optimization of classification and survival objectives. The study emphasizes model development, feature engineering, and optimization and includes external validation using data from a distinct London region to assess spatial generalizability. Additionally, we leverage longitudinal EHR data to better support practical implementation in primary care.</p><p>The specific objectives of this study were as follows:</p><list list-type="order"><list-item><p>Extract structured and unstructured predictors, CVD outcomes (binary classification and TTE data) from Clinical Practice Research Datalink (CPRD) Aurum [<xref ref-type="bibr" rid="ref15">15</xref>] via phenotyping.</p></list-item><list-item><p>Develop the MT-BERT model integrating textual and structured variables for CVD risk prediction, incorporating the novel FocalCoxLoss for multitask learning.</p></list-item><list-item><p>Evaluate model performance across individual and composite CVD outcomes, with subgroup analyses by ethnicity and socioeconomic deprivation, focusing on discrimination, calibration, and fairness metrics.</p></list-item><list-item><p>Ensure transparent reporting according to the TRIPOD+AI (Transparent Reporting of a multivariable prediction model for Individual Prognosis Or Diagnosis + artificial intelligence) [<xref ref-type="bibr" rid="ref16">16</xref>], including detailed documentation of feature engineering, data harmonization, and hyperparameter optimization.</p></list-item></list><p>The aim of this study was to evaluate the effectiveness of multimodal deep learning for long-term CVD risk prediction and to advance personalized health care through the integration of NLP and structured data from UK EHR.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>This study follows the guidelines outlined in the TRIPOD+AI [<xref ref-type="bibr" rid="ref16">16</xref>] statement (<xref ref-type="supplementary-material" rid="app3">Checklist 1</xref>); an extension of the TRIPOD statement specifically designed for AI (including ML) prediction models.</p></sec><sec id="s2-2"><title>Study Design and Patient Records Inclusion</title><p>This study uses a retrospective EHR-based design, leveraging CPRD Aurum [<xref ref-type="bibr" rid="ref15">15</xref>] (study registration protocol no.: 21_000346) to develop NLP-driven CVD risk prediction models using longitudinal patient records. CPRD Aurum is a routinely updated UK primary care database integrating deidentified patient records from 1359 GP practices (primary care sites), covering 16.6% of UK GPs and representing one-fifth of the UK population. It includes demographic data, diagnoses, symptoms, prescriptions, and dosages, standardized via MedCode ID to ensure interoperability with Read Code and SNOMED CT. As of January 2022 (version 2.7), it comprises over 40 million research-eligible patients with a median follow-up of 8.74 years (IQR 3.22-19.87).</p><p>The &#x201C;External&#x201D; (London) validation set represents a &#x201C;spatial&#x201D; external cohort from CPRD practices located in Greater London, selected to ensure demographic diversity, particularly higher proportions of ethnic minority patients.</p><p>The data preprocessing workflow followed our prior work [<xref ref-type="bibr" rid="ref17">17</xref>]. As shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>, we selected a stratified random sample of 469,496 patients aged 40 to 85 years who were registered between 2011 and 2021. Unique CPRD identifiers were used to link records and reconstruct longitudinal health trajectories, ensuring a representative UK primary care cohort. Patients with at least one 10-year follow-up period were included to verify consistency in registration and clinical event timestamps. Follow-up intervals began at ages 40 to 75 years in 5-year increments, allowing individuals to contribute multiple independent 10-year records if they were CVD-free at entry. To emulate clinical practice, we extracted multiple fixed-horizon prediction episodes for each patient, reflecting periodic risk assessments recommended in the UK NICE guidelines [<xref ref-type="bibr" rid="ref3">3</xref>] for adults aged 40 to 85 years. Consistent with our prior ML-based survival study, all baseline models were trained in exactly the same pipeline with identical predictors, censoring rules, preprocessing, and the same development and validation splits. This setup enables like-for-like comparisons and effectively represents structure-only, single-objective ablation settings against which MT-BERT is evaluated. While the guidelines do not specify data extraction procedures, this design strategy ensures alignment with real-world primary prevention workflows and enhances model relevance for general practice.</p><p>Given the scale and representativeness of CPRD Aurum, sample size adequacy was evaluated according to the framework proposed by Riley et al [<xref ref-type="bibr" rid="ref17">17</xref>] for clinical prediction model development. The cohort substantially exceeded recommended thresholds for minimum events per predictor, enabling robust model development, rare event estimation, and stratified subgroup analyses.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Workflow for study population and records extraction. CPRD: Clinical Practice Research Datalink; CVD: cardiovascular disease.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e76659_fig01.png"/></fig></sec><sec id="s2-3"><title>Extraction and Textual Transformation of Predictors</title><p>We extracted all predictor variables at the cohort entry year, primarily based on QRISK3 definitions, with minor adaptations where appropriate (<xref ref-type="table" rid="table1">Table 1</xref>). Phenotypes were defined using the HDRUK Phenotype Library [<xref ref-type="bibr" rid="ref18">18</xref>], while comorbidities were identified through CArdiovascular disease research using LInked BEspoke studies and Electronic health Records (CALIBER, developed by the University College London Institute of Health Informatics [CALIBER research team]) [<xref ref-type="bibr" rid="ref19">19</xref>], with corresponding <italic>ICD-10</italic> (<italic>International Classification of Diseases, Tenth Revision</italic>) codes detailed in Table S1 (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). MedCode IDs were documented and mapped to controlled terminologies to ensure consistency, facilitate updates, and enhance transparency (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p><p>Static variables (eg, gender, ethnicity, and deprivation) were assigned based on the most recent record available. Time-dependent variables (eg, smoking status and medication use) were determined within a 6-month window around the entry year; in the absence of recent records, smoking status was inferred from prior history, and medication use was classified based on any previous prescription. Comorbidities were considered present if recorded before cohort entry within the same time frame. Absence of records was interpreted as no occurrence rather than missing data.</p><p>For continuous variables (eg, BMI, systolic blood pressure, and total cholesterol/high-Ddensity lipoprotein ratio), if no values were available within the acceptable window, missing data were imputed using linear regression, provided that at least 3 prior measurements existed within a 2-year span.</p><p>To integrate established risk assessment into ML models, we derived an additional predictor based on the QRISK score calculated by an R package [<xref ref-type="bibr" rid="ref20">20</xref>], represented in categorical form, aligned with UK clinical thresholds for statin treatment initiation.</p><p>Unlike traditional models based on structured data, this study transforms structured predictors into text representations for NLP-based modeling. To implement this approach, categorical variables including risk groups, ethnicity, smoking status, and comorbidities were converted into textual descriptions reflecting patient status at cohort entry, avoiding numerical encoding. Age and BMI were also expressed in textual form. Continuous variables were otherwise retained in their original form. The textual transformation template is provided in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Overview of predictors, outcome definitions, and preprocessing approaches.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variable</td><td align="left" valign="bottom">Textual transformation/Preprocessing description</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Continuous predictors</td></tr><tr><td align="left" valign="top">Age</td><td align="left" valign="top"><italic>&#x201C;The patient is a {age_category}&#x201D;</italic>, where age &#x003C; 45=young adult, 45&#x2010;59=middle-aged adult, 60&#x2010;74=young-old adult, and age &#x2265; 75=older adult</td></tr><tr><td align="left" valign="top">BMI (kg/m&#x00B2;)</td><td align="left" valign="top">Appended as: <italic>&#x201C;with a BMI classified as {bmi_category}&#x201D;</italic>, where BMI &#x003C;18.5=underweight, 18.5&#x2010;24.9=normal weight, 25.0&#x2010;29.9=overweight (preobesity), 30.0&#x2010;34.9=Class 1 obesity, 35.0&#x2010;39.9=Class 2 obesity, BMI &#x2265;40.0=Class 3 obesity (severe obesity)</td></tr><tr><td align="left" valign="top">Systolic blood pressure (mmHg)</td><td align="left" valign="top">Normalized to [0, 1] range using min-max scaling</td></tr><tr><td align="left" valign="top">SD of systolic blood pressure</td><td align="left" valign="top">Normalized to [0, 1] range using min-max scaling</td></tr><tr><td align="left" valign="top">Diastolic blood pressure (mmHg)</td><td align="left" valign="top">Normalized to [0, 1] range using min-max scaling</td></tr><tr><td align="left" valign="top">Total/High-density lipoprotein ratio</td><td align="left" valign="top">Normalized to [0, 1] range using min-max scaling</td></tr><tr><td align="left" valign="top">Townsend score</td><td align="left" valign="top">Normalized to [0, 1] range using min-max scaling</td></tr><tr><td align="left" valign="top" colspan="2">Categorical predictors</td></tr><tr><td align="left" valign="top">Ethnicity (9 categories)</td><td align="left" valign="top">Appended as: <italic>&#x201C;and is of {ethnicity_group} ethnicity&#x201D;</italic></td></tr><tr><td align="left" valign="top">Smoking status (5 categories)</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient is a {smoking_status}&#x201D;</italic></td></tr><tr><td align="left" valign="top">Prestratified risk group (4 categories)</td><td align="left" valign="top">Appended as: <italic>&#x201C;Pre-stratified risk indicating a {risk_group}&#x201D;</italic>, where 0%&#x2010;4.9%%=low risk, 5.0%&#x2010;9.9%%=moderate risk, 10.0%&#x2010;19.9%%=high risk, and &#x2265;20.0%=extreme high risk</td></tr><tr><td align="left" valign="top" colspan="2">Binary comorbidity predictors</td></tr><tr><td align="left" valign="top">Type 1/2 diabetes mellitus</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2,.}&#x201D;</italic></td></tr><tr><td align="left" valign="top">Chronic kidney disease stage 3, 4, or 5</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2, .}&#x201D;</italic></td></tr><tr><td align="left" valign="top">Family history of coronary heart disease</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2, .}&#x201D;</italic></td></tr><tr><td align="left" valign="top">Atrial fibrillation</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2, .}&#x201D;</italic></td></tr><tr><td align="left" valign="top">Erectile dysfunction</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2, .}&#x201D;</italic></td></tr><tr><td align="left" valign="top">HIV/AIDS</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2, .}&#x201D;</italic></td></tr><tr><td align="left" valign="top">Migraine</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2, .}&#x201D;</italic></td></tr><tr><td align="left" valign="top">Rheumatoid arthritis</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2, .}&#x201D;</italic></td></tr><tr><td align="left" valign="top">Systemic lupus erythematosus</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2, .}&#x201D;</italic></td></tr><tr><td align="left" valign="top">Severe mental illness</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2, .}&#x201D;</italic></td></tr><tr><td align="left" valign="top">Antipsychotic</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2, .}&#x201D;</italic></td></tr><tr><td align="left" valign="top">Corticosteroid</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2, .}&#x201D;</italic></td></tr><tr><td align="left" valign="top">Treated hypertension</td><td align="left" valign="top">Appended as: <italic>&#x201C;The patient has a history of {condition_1, condition_2, .}&#x201D;</italic></td></tr><tr><td align="left" valign="top" colspan="2">Cardiovascular disease outcomes</td></tr><tr><td align="left" valign="top">Coronary heart disease</td><td align="left" valign="top">Used as binary outcome labels indicating disease presence (1=event occurred, 0=no event). Corresponding time-to-event (TTE) values were recorded in days, with a maximum follow-up of 3650 days for censored cases</td></tr><tr><td align="left" valign="top">Myocardial infarction</td><td align="left" valign="top">Used as binary outcome labels indicating disease presence (1=event occurred, 0=no event). Corresponding TTE values were recorded in days, with a maximum follow-up of 3650 days for censored cases</td></tr><tr><td align="left" valign="top">Ischemic stroke</td><td align="left" valign="top">Used as binary outcome labels indicating disease presence (1=event occurred, 0=no event). Corresponding TTE values were recorded in days, with a maximum follow-up of 3650 days for censored cases</td></tr><tr><td align="left" valign="top">Heart failure</td><td align="left" valign="top">Used as binary outcome labels indicating disease presence (1=event occurred, 0=no event). Corresponding TTE values were recorded in days, with a maximum follow-up of 3650 days for censored cases</td></tr><tr><td align="left" valign="top">Angina</td><td align="left" valign="top">Used as binary outcome labels indicating disease presence (1=event occurred, 0=no event). Corresponding TTE values were recorded in days, with a maximum follow-up of 3650 days for censored cases</td></tr><tr><td align="left" valign="top">Cardiovascular disease (QRISK)</td><td align="left" valign="top">A composite outcome including coronary heart disease (CHD), stroke, and transient ischemic attack (TIA). TTE was defined as the time to the earliest occurring event, in days</td></tr><tr><td align="left" valign="top">Cardiovascular disease (Composite)</td><td align="left" valign="top">A composite outcome including the above cardiovascular conditions, plus abdominal aortic aneurysm (AAA), and peripheral artery disease (PAD). TTE was defined as the time to the earliest occurring event, in days</td></tr><tr><td align="left" valign="top">Vascular dementia</td><td align="left" valign="top">Used as a binary outcome label indicating disease presence (1=event occurred, 0=no event). TTE was recorded in days, with a maximum follow-up of 3650 days for censored cases</td></tr></tbody></table></table-wrap></sec><sec id="s2-4"><title>CVD Outcome and TTE Specification</title><p>Each CVD outcome (<xref ref-type="table" rid="table1">Table 1</xref>) was defined using phenotype-based criteria and assessed individually with its corresponding TTE, including CHD (<italic>ICD-10</italic>: I20-I25), stroke (I63-I66, I69), transient ischemic attack (G45-G46), heart failure (I50), abdominal aortic aneurysm (I71), and peripheral arterial disease (I73-I74). Additionally, 2 composite CVD outcomes, one aligned with QRISK and one containing all of the above, were defined based on the earliest occurrence of any included CVD event. Although not a classical CVD outcome, vascular dementia (F01) was included as an exploratory end point due to its close association with cerebrovascular disease. CVD events were recorded if they occurred within the 10-year follow-up or up to 6 months thereafter. Absence of records was interpreted as no event rather than missing data. The definition and mapping of CVD outcomes are detailed in Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> and in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p></sec><sec id="s2-5"><title>Model Architecture and Training Strategy</title><p>As shown in <xref ref-type="fig" rid="figure2">Figure 2</xref>, we developed an MT-BERT framework for the prediction of CVD outcomes, which integrates a distilled version of the BERT model (DistilBERT) [<xref ref-type="bibr" rid="ref21">21</xref>] for textual features and a multilayer perceptron (MLP) for structured clinical predictors, incorporating multihead attention and residual connections to refine feature interactions. DistilBERT retains representational power while reducing computational complexity by removing token-type embeddings and the pooler layer. Unlike conventional concatenation-based approaches, our model applies a transformation layer for feature alignment before passing the fused representation through stacked attention layers, enabling dynamic cross-modal refinement. The MLP module, implemented in PyTorch, consists of fully connected layers with Rectified Linear Unit activations and dropout, ensuring structured feature regularization. For prediction, the model simultaneously outputs binary classification logits (CVD presence) and a log hazard score (TTE) for survival analysis, with temperature scaling improving classification stability. Full implementation details are provided in Table S3 (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><p>Structured features were MinMax scaled, and text data were tokenized using the DistilBERT tokenizer. The dataset was split in a 7:2:1 ratio into training, validation, and testing sets (<xref ref-type="fig" rid="figure1">Figure 1</xref>) using stratified sampling to maintain class balance. Labels and TTE values were converted into tensors for model compatibility. Class imbalance was addressed with a dynamically adjusted weighting scheme, scaling standard balanced class weights based on the observed event rate. The processed data were structured into TensorDatasets and loaded for training.</p><p>Our model uses a customized FocalCoxLoss, a multitask loss function that combines Focal Loss, a class imbalance-aware variant of cross-entropy loss, for event classification, which optimizes decision boundaries, and Cox Loss, as used in DeepSurv [<xref ref-type="bibr" rid="ref12">12</xref>], for survival analysis, which optimizes risk ranking. The 2 components are balanced using learnable uncertainty-based weighting [<xref ref-type="bibr" rid="ref22">22</xref>] as follows:</p><p><inline-formula><mml:math id="ieqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mtext mathvariant="fraktur">&#x00A0;</mml:mtext></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mtext mathvariant="fraktur">&#x00A0;</mml:mtext></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mn>2</mml:mn><mml:msubsup><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mrow></mml:mfrac><mml:mo>)</mml:mo></mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mrow><mml:mtext>&#x00A0;</mml:mtext></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mn>2</mml:mn><mml:msubsup><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mrow></mml:mfrac><mml:mo>)</mml:mo></mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>log</mml:mi><mml:mo>&#x2061;</mml:mo><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>log</mml:mi><mml:mo>&#x2061;</mml:mo><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p><p>where <inline-formula><mml:math id="ieqn2"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn3"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are learnable task-specific variance parameters. Unlike fixed-weight loss functions, this approach enables dynamic adjustment of task contributions by assigning higher effective weights to tasks with lower predicted uncertainty, thereby optimizing the trade-off between binary CVD event prediction and TTE risk estimation during training.</p><p>To ensure stability, we adopted a 2-stage training strategy. Initially, was frozen, maintaining a fixed loss ratio to prevent early instability. After 10 epochs, it was unfrozen, allowing adaptive loss scaling. DistilBERT remained trainable throughout, enabling concurrent optimization of textual and structured representations.</p><p>Training used AdamW with cosine annealing, gradient clipping, and early stopping for stable convergence. Dropout rates were optimized via Optuna hyperparameter tuning, while learning rate (initially 3e-4) and weight decay (1e-4) were manually set. During training, the learning rate was dynamically adjusted using a cosine annealing schedule with warm restarts. Model selection was based on the area under the curve and C-statistic maximization.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Architecture of the Multitask Bidirectional Encoder Representations from Transformers model for cardiovascular disease risk prediction. HDL: high-density lipoprotein; MLP: multilayer perceptron; SBP: systolic blood pressure; TTE: time-to-event.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e76659_fig02.png"/></fig></sec><sec id="s2-6"><title>Performance Evaluation, Generalizability, and Fairness</title><p>Model performance was assessed on the held-out test set using the optimal hyperparameters identified during training (<xref ref-type="fig" rid="figure1">Figure 1</xref>). Calibration was performed via isotonic regression and assessed using Brier scores and calibration plots across risk deciles. Discrimination was quantified by the AUROC and the C-statistic, with 95% CIs estimated via 1000 bootstrap resamples. Internal validity was assessed through 10-fold cross-validation. Classification metrics, including accuracy, recall, and specificity, were reported at the optimized threshold. Kaplan-Meier (KM) survival curves stratified by predicted risk groups were used to evaluate risk separation and clinical utility.</p><p>To evaluate generalizability and fairness, we performed &#x201C;spatial external&#x201D; validation using a London held-out cohort (<xref ref-type="fig" rid="figure1">Figure 1</xref>), selected for its higher proportion of ethnic minority patients. AUROC with 95% CIs was computed across all 9 ethnicity categories and Townsend deprivation deciles. In parallel, we reported aggregated subgroup results for broader ethnicity groupings including White, South Asian (Indian, Pakistani, and Bangladeshi), and Black (African and Caribbean). We further evaluated performance variation across individual Townsend quintiles. To quantify heterogeneity in model performance, Higgins <italic>I</italic>&#x00B2; statistic and Cochran Q values [<xref ref-type="bibr" rid="ref23">23</xref>] were computed across ethnicity and deprivation groups, where the Higgins <italic>I</italic>&#x00B2; statistic quantifies the percentage of total variation across studies due to heterogeneity rather than chance, and the Q value tests whether the observed differences in study results are due to chance or true heterogeneity.</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>This study uses CPRD Aurum data under protocol number 21_000346, which was reviewed and approved by the Independent Scientific Advisory Committee (ISAC) on behalf of the Medicines and Healthcare Products Regulatory Agency. The study relies on anonymized patient records and follows ethical guidelines for research using deidentified health care data. No patient or public consent was required, as the research was conducted using fully anonymized data. There was no direct patient or public involvement in the study&#x2019;s design, execution, analysis, or dissemination. No financial compensation was provided.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Baseline Characteristics of Study Population</title><p>We extracted 909,848 records from 469,496 patients across 1476 practices in England (<xref ref-type="fig" rid="figure1">Figure 1</xref>). After quality control to remove records with extreme values for continuous features, 855,422 valid records were retained. Of these, 144,370 records (16.9%) from practices located in London were held out as the spatial external validation set. The remaining 711,052 records (83.1%) constituted the development dataset, which was randomly split into training, validation, and testing sets using a 70:15:15 ratio.</p><p><xref ref-type="table" rid="table2">Table 2</xref> summarizes the background characteristics of both the development dataset and the spatial validation set, disaggregated by gender. The statistics show that most comorbidities and CVD outcomes are significantly more common among males, which makes gender-specific modeling indispensable for this task. In addition, a substantially higher proportion, approximately one-third of individuals from minority ethnic backgrounds, is observed in the spatial validation set, whereas over 90% of the development dataset is White. This demographic contrast supports the use of the London practice cohort as a spatially distinct external validation set, particularly for assessing model generalizability and fairness in managing CVD risk among ethnic minority populations.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Baseline characteristics of the study population by gender.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristics</td><td align="left" valign="bottom" colspan="2">Male (n=403,545, 47.17%)</td><td align="left" valign="bottom" colspan="2">Female (n=451,877, 52.82%)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Development<break/>(n=335,891)</td><td align="left" valign="top">Spatial validation<break/>(n=67,654)</td><td align="left" valign="top">Development<break/>(n=375,161)</td><td align="left" valign="top">Spatial validation<break/>(n=76,716)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="5">Continuous features, mean (SD)</td></tr><tr><td align="left" valign="top">Age</td><td align="left" valign="top">50.62 (9.16)</td><td align="left" valign="top">50.51 (9.10)</td><td align="left" valign="top">51.29 (9.51)</td><td align="left" valign="top">51.13 (9.43)</td></tr><tr><td align="left" valign="top">BMI</td><td align="left" valign="top">27.45 (4.42)</td><td align="left" valign="top">27.25 (4.31)</td><td align="left" valign="top">27.18 (5.47)</td><td align="left" valign="top">26.54 (5.30)</td></tr><tr><td align="left" valign="top">Systolic blood pressure</td><td align="left" valign="top">135.17 (12.30)</td><td align="left" valign="top">134.79 (12.02)</td><td align="left" valign="top">131.32 (14.83)</td><td align="left" valign="top">129.63 (14.88)</td></tr><tr><td align="left" valign="top">SD of systolic blood pressure</td><td align="left" valign="top">9.05 (4.83)</td><td align="left" valign="top">8.93 (4.88)</td><td align="left" valign="top">9.87 (4.88)</td><td align="left" valign="top">9.50 (4.79)</td></tr><tr><td align="left" valign="top">Diastolic blood pressure</td><td align="left" valign="top">81.82 (7.42)</td><td align="left" valign="top">81.75 (7.33)</td><td align="left" valign="top">79.18 (7.93)</td><td align="left" valign="top">78.47 (7.93)</td></tr><tr><td align="left" valign="top">Total/High-density lipoprotein ratio</td><td align="left" valign="top">4.08 (0.83)</td><td align="left" valign="top">4.20 (0.84)</td><td align="left" valign="top">4.11 (0.84)</td><td align="left" valign="top">4.19 (0.85)</td></tr><tr><td align="left" valign="top" colspan="5">Categorical features, %</td></tr><tr><td align="left" valign="top">Ethnicity</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>White</td><td align="left" valign="top">93.44</td><td align="left" valign="top">64.82</td><td align="left" valign="top">93.92</td><td align="left" valign="top">65.65</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indian</td><td align="left" valign="top">1.50</td><td align="left" valign="top">8.33</td><td align="left" valign="top">1.46</td><td align="left" valign="top">8.32</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Pakistani</td><td align="left" valign="top">1.02</td><td align="left" valign="top">2.05</td><td align="left" valign="top">0.91</td><td align="left" valign="top">1.62</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bangladeshi</td><td align="left" valign="top">0.26</td><td align="left" valign="top">1.35</td><td align="left" valign="top">0.22</td><td align="left" valign="top">1.18</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Chinese</td><td align="left" valign="top">0.23</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.28</td><td align="left" valign="top">1.04</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other Asian</td><td align="left" valign="top">0.50</td><td align="left" valign="top">4.40</td><td align="left" valign="top">0.47</td><td align="left" valign="top">4.28</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Black Caribbean</td><td align="left" valign="top">0.50</td><td align="left" valign="top">5.32</td><td align="left" valign="top">0.58</td><td align="left" valign="top">6.57</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Black African</td><td align="left" valign="top">0.35</td><td align="left" valign="top">6.73</td><td align="left" valign="top">0.30</td><td align="left" valign="top">6.10</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other ethnic group</td><td align="left" valign="top">2.20</td><td align="left" valign="top">6.04</td><td align="left" valign="top">1.84</td><td align="left" valign="top">5.24</td></tr><tr><td align="left" valign="top">Smoking status</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Nonsmoker</td><td align="left" valign="top">45.73</td><td align="left" valign="top">47.10</td><td align="left" valign="top">58.42</td><td align="left" valign="top">59.71</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ex-smoker</td><td align="left" valign="top">51.73</td><td align="left" valign="top">50.71</td><td align="left" valign="top">39.40</td><td align="left" valign="top">38.50</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Light smoker</td><td align="left" valign="top">0.61</td><td align="left" valign="top">0.47</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.63</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Moderate smoker</td><td align="left" valign="top">1.17</td><td align="left" valign="top">1.04</td><td align="left" valign="top">0.99</td><td align="left" valign="top">0.80</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Heavy smoker</td><td align="left" valign="top">0.76</td><td align="left" valign="top">0.67</td><td align="left" valign="top">0.44</td><td align="left" valign="top">0.35</td></tr><tr><td align="left" valign="top">Risk group</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Low risk</td><td align="left" valign="top">41.97</td><td align="left" valign="top">44.54</td><td align="left" valign="top">57.11</td><td align="left" valign="top">61.42</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Moderate risk</td><td align="left" valign="top">26.67</td><td align="left" valign="top">26.74</td><td align="left" valign="top">22.59</td><td align="left" valign="top">21.51</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>High risk</td><td align="left" valign="top">21.63</td><td align="left" valign="top">20.53</td><td align="left" valign="top">14.98</td><td align="left" valign="top">13.07</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Extreme high risk</td><td align="left" valign="top">9.73</td><td align="left" valign="top">8.18</td><td align="left" valign="top">5.32</td><td align="left" valign="top">3.99</td></tr><tr><td align="left" valign="top" colspan="5">Binary comorbidity, %</td></tr><tr><td align="left" valign="top">Type 1 and 2 diabetes mellitus</td><td align="left" valign="top">4.75</td><td align="left" valign="top">3.82</td><td align="left" valign="top">3.61</td><td align="left" valign="top">2.82</td></tr><tr><td align="left" valign="top">Chronic kidney disease stage 3, 4, or 5</td><td align="left" valign="top">0.89</td><td align="left" valign="top">0.89</td><td align="left" valign="top">1.31</td><td align="left" valign="top">1.52</td></tr><tr><td align="left" valign="top">Family history of coronary heart disease</td><td align="left" valign="top">3.72</td><td align="left" valign="top">2.72</td><td align="left" valign="top">4.80</td><td align="left" valign="top">3.23</td></tr><tr><td align="left" valign="top">Atrial fibrillation</td><td align="left" valign="top">0.80</td><td align="left" valign="top">0.92</td><td align="left" valign="top">0.45</td><td align="left" valign="top">0.46</td></tr><tr><td align="left" valign="top">Erectile dysfunction</td><td align="left" valign="top">4.12</td><td align="left" valign="top">4.25</td><td align="left" valign="top">NA</td><td align="left" valign="top">NA</td></tr><tr><td align="left" valign="top">HIV/AIDS</td><td align="left" valign="top">0.12</td><td align="left" valign="top">0.05</td><td align="left" valign="top">0.03</td><td align="left" valign="top">0.03</td></tr><tr><td align="left" valign="top">Migraine</td><td align="left" valign="top">2.36</td><td align="left" valign="top">2.91</td><td align="left" valign="top">6.40</td><td align="left" valign="top">7.17</td></tr><tr><td align="left" valign="top">Rheumatoid arthritis</td><td align="left" valign="top">0.37</td><td align="left" valign="top">0.35</td><td align="left" valign="top">0.89</td><td align="left" valign="top">0.79</td></tr><tr><td align="left" valign="top">Systemic lupus erythematosus</td><td align="left" valign="top">0.05</td><td align="left" valign="top">0.05</td><td align="left" valign="top">0.21</td><td align="left" valign="top">0.22</td></tr><tr><td align="left" valign="top">Severe mental illness</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.68</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.84</td></tr><tr><td align="left" valign="top">Antipsychotic</td><td align="left" valign="top">0.42</td><td align="left" valign="top">0.34</td><td align="left" valign="top">0.42</td><td align="left" valign="top">0.37</td></tr><tr><td align="left" valign="top">Corticosteroid</td><td align="left" valign="top">3.22</td><td align="left" valign="top">2.53</td><td align="left" valign="top">4.68</td><td align="left" valign="top">3.87</td></tr><tr><td align="left" valign="top">Treated hypertension</td><td align="left" valign="top">13.83</td><td align="left" valign="top">12.72</td><td align="left" valign="top">14.43</td><td align="left" valign="top">13.01</td></tr><tr><td align="left" valign="top" colspan="5">Cardiovascular disease outcomes, %</td></tr><tr><td align="left" valign="top">Cardiovascular disease (QRISK)</td><td align="left" valign="top">8.19</td><td align="left" valign="top">6.84</td><td align="left" valign="top">4.91</td><td align="left" valign="top">3.57</td></tr><tr><td align="left" valign="top">Cardiovascular disease (Composite)</td><td align="left" valign="top">10.87</td><td align="left" valign="top">9.04</td><td align="left" valign="top">6.59</td><td align="left" valign="top">4.97</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Coronary heart disease</td><td align="left" valign="top">4.80</td><td align="left" valign="top">3.65</td><td align="left" valign="top">2.47</td><td align="left" valign="top">1.52</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Myocardial infarction</td><td align="left" valign="top">2.60</td><td align="left" valign="top">2.13</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.76</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stroke</td><td align="left" valign="top">2.26</td><td align="left" valign="top">1.97</td><td align="left" valign="top">1.73</td><td align="left" valign="top">1.34</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Transient ischemic attack</td><td align="left" valign="top">1.38</td><td align="left" valign="top">1.31</td><td align="left" valign="top">1.14</td><td align="left" valign="top">0.92</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Abdominal aortic aneurysm</td><td align="left" valign="top">0.46</td><td align="left" valign="top">0.37</td><td align="left" valign="top">0.08</td><td align="left" valign="top">0.09</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Peripheral artery disease</td><td align="left" valign="top">1.27</td><td align="left" valign="top">0.85</td><td align="left" valign="top">0.62</td><td align="left" valign="top">0.49</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Heart failure</td><td align="left" valign="top">1.28</td><td align="left" valign="top">1.02</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.65</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Angina</td><td align="left" valign="top">3.78</td><td align="left" valign="top">3.37</td><td align="left" valign="top">2.26</td><td align="left" valign="top">1.61</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Vascular dementia</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.63</td><td align="left" valign="top">0.91</td><td align="left" valign="top">0.77</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>Model Predictive Performance and Risk Stratification</title><p>We developed the MT-BERT model for multiple CVD outcomes, and all performance metrics were derived from the held-out test set unless otherwise stated (<xref ref-type="table" rid="table3">Table 3</xref>), including discrimination, calibration, and classification results for both composite and individual outcomes.</p><p>For both genders, the model trained to predict the composite CVD outcome achieved the highest discrimination. In males, the AUROC was 0.744 (95% CI 0.738&#x2010;0.749), and in females, 0.782 (95% CI 0.768&#x2010;0.796), higher than models trained on QRISK-defined CVD outcomes (males: 0.723, 95% CI 0.717&#x2010;0.732; females: 0.767, 95% CI 0.760&#x2010;0.777). The corresponding concordance index (C-index) values were 0.713 (95% CI 0.710&#x2010;0.716) in males and 0.732 (95% CI 0.725&#x2010;0.742) in females for composite CVD.</p><p>Among individual outcomes, ischemic stroke (males: 0.738, females: 0.769) and CHD (males: 0.732, females: 0.769) demonstrated relatively strong discriminatory performance. Angina also showed acceptable discrimination (males: 0.713, females: 0.741). In contrast, myocardial infarction exhibited lower discrimination (males: 0.679, females: 0.717), particularly in males, where recall was high but overall accuracy was reduced. Vascular dementia was additionally explored on an experimental basis and showed overall limited performance across evaluation metrics, similar to the pattern observed for myocardial infarction.</p><p>Continuous variables are presented as mean values with 2 decimal places for consistency. Individual clinical measurements (eg, blood pressure) are typically recorded as integers, but decimal values arise when reporting cohort-level averages.</p><p>&#x201C;Cardiovascular disease (QRISK)&#x201D; is a composite outcome including CHD, stroke, and transient ischemic attack. &#x201C;Cardiovascular disease (Composite)&#x201D; includes the above conditions plus abdominal aortic aneurysm and peripheral artery disease. Benchmark model results were adapted from our prior work [<xref ref-type="bibr" rid="ref24">24</xref>], where models were trained and evaluated using the same CPRD Aurum cohort and standardized preprocessing for consistent comparability. RSF (Random Survival Forest), GBSA (Gradient Boosted Survival Analysis), XGBS (Extreme Gradient Boosted Survival Model), DeepSurv, and DeepHit (Deep Neural Network for Competing Risks).</p><p>KM curves are plotted with a full 0&#x2010;1 <italic>y</italic>-axis; shaded bands denote 95% CIs. Insets provide a zoomed view of the upper survival range for readability. Segments after time points with &#x003C;10 individuals at risk are suppressed. High- versus low-risk groups use sex-specific thresholds (men&#x2265;40%, women&#x2265;34%). <italic>P</italic> values are from log-rank tests.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Performance metrics of the Multi-Task Bidirectional Encoder Representations from Transformers model across cardiovascular disease outcomes by gender (test set).</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Predicted outcome</td><td align="left" valign="bottom">AUROC<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> (95% CI)</td><td align="left" valign="bottom">C-index<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> (95% CI)</td><td align="left" valign="bottom">Brier score</td><td align="left" valign="bottom">Accuracy</td><td align="left" valign="bottom">Specificity</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom">Baseline threshold (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="8">Male</td></tr><tr><td align="left" valign="top">CVD<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup> (Composite)</td><td align="left" valign="top">0.744 (0.738- 0.749)</td><td align="left" valign="top">0.713 (0.710- 0.716)</td><td align="left" valign="top">0.130</td><td align="left" valign="top">0.823</td><td align="left" valign="top">0.874</td><td align="left" valign="top">0.405</td><td align="left" valign="top">0.40</td></tr><tr><td align="left" valign="top">CVD (QRISK)</td><td align="left" valign="top">0.723 (0.717- 0.732)</td><td align="left" valign="top">0.699 (0.696- 0.702)</td><td align="left" valign="top">0.119</td><td align="left" valign="top">0.733</td><td align="left" valign="top">0.750</td><td align="left" valign="top">0.548</td><td align="left" valign="top">0.34</td></tr><tr><td align="left" valign="top">CHD<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="top">0.732 (0.726- 0.739)</td><td align="left" valign="top">0.707 (0.704- 0.709)</td><td align="left" valign="top">0.107</td><td align="left" valign="top">0.782</td><td align="left" valign="top">0.795</td><td align="left" valign="top">0.518</td><td align="left" valign="top">0.34</td></tr><tr><td align="left" valign="top">Stroke</td><td align="left" valign="top">0.738 (0.702- 0.764)</td><td align="left" valign="top">0.688 (0.684- 0.692)</td><td align="left" valign="top">0.065</td><td align="left" valign="top">0.676</td><td align="left" valign="top">0.676</td><td align="left" valign="top">0.656</td><td align="left" valign="top">0.33</td></tr><tr><td align="left" valign="top">MI<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="left" valign="top">0.679 (0.670- 0.694)</td><td align="left" valign="top">0.667 (0.663- 0.674)</td><td align="left" valign="top">0.133</td><td align="left" valign="top">0.125</td><td align="left" valign="top">0.102</td><td align="left" valign="top">0.958</td><td align="left" valign="top">0.35</td></tr><tr><td align="left" valign="top">Angina</td><td align="left" valign="top">0.713 (0.702- 0.723)</td><td align="left" valign="top">0.701 (0.695- 0.706)</td><td align="left" valign="top">0.104</td><td align="left" valign="top">0.765</td><td align="left" valign="top">0.775</td><td align="left" valign="top">0.517</td><td align="left" valign="top">0.33</td></tr><tr><td align="left" valign="top" colspan="8">Female</td></tr><tr><td align="left" valign="top">CVD (Composite)</td><td align="left" valign="top">0.782 (0.768- 0.796)</td><td align="left" valign="top">0.732 (0.725- 0.742)</td><td align="left" valign="top">0.091</td><td align="left" valign="top">0.861</td><td align="left" valign="top">0.887</td><td align="left" valign="top">0.487</td><td align="left" valign="top">0.34</td></tr><tr><td align="left" valign="top">CVD (QRISK)</td><td align="left" valign="top">0.767 (0.760- 0.777)</td><td align="left" valign="top">0.752 (0.749- 0.757)</td><td align="left" valign="top">0.077</td><td align="left" valign="top">0.914</td><td align="left" valign="top">0.9456</td><td align="left" valign="top">0.2950</td><td align="left" valign="top">0.31</td></tr><tr><td align="left" valign="top">CHD</td><td align="left" valign="top">0.769 (0.750- 0.785)</td><td align="left" valign="top">0.732 (0.725- 0.742)</td><td align="left" valign="top">0.059</td><td align="left" valign="top">0.872</td><td align="left" valign="top">0.883</td><td align="left" valign="top">0.427</td><td align="left" valign="top">0.28</td></tr><tr><td align="left" valign="top">Stroke</td><td align="left" valign="top">0.769 (0.717- 0.852)</td><td align="left" valign="top">0.737 (0.732- 0.745)</td><td align="left" valign="top">0.018</td><td align="left" valign="top">0.934</td><td align="left" valign="top">0.936</td><td align="left" valign="top">0.256</td><td align="left" valign="top">0.23</td></tr><tr><td align="left" valign="top">MI</td><td align="left" valign="top">0.717 (0.680- 0.761)</td><td align="left" valign="top">0.687 (0.682- 0.696)</td><td align="left" valign="top">0.078</td><td align="left" valign="top">0.408</td><td align="left" valign="top">0.403</td><td align="left" valign="top">0.900</td><td align="left" valign="top">0.27</td></tr><tr><td align="left" valign="top">Angina</td><td align="left" valign="top">0.741 (0.731- 0.754)</td><td align="left" valign="top">0.721 (0.718- 0.724)</td><td align="left" valign="top">0.070</td><td align="left" valign="top">0.878</td><td align="left" valign="top">0.887</td><td align="left" valign="top">0.387</td><td align="left" valign="top">0.29</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>AUROC: area under the receiver operating characteristic curve.</p></fn><fn id="table3fn2"><p><sup>b</sup>C-index: concordance index.</p></fn><fn id="table3fn3"><p><sup>c</sup>Cardiovascular disease (QRISK)&#x201D; is a composite outcome including coronary heart disease (CHD), stroke, and transient ischemic attack (TIA). &#x201C;Cardiovascular disease (Composite)&#x201D; includes the above conditions plus abdominal aortic aneurysm (AAA) and peripheral artery disease (PAD).</p></fn><fn id="table3fn4"><p><sup>d</sup>CHD: coronary heart disease.</p></fn><fn id="table3fn5"><p><sup>e</sup>MI: myocardial infarction. </p></fn></table-wrap-foot></table-wrap><p>The left panel of <xref ref-type="fig" rid="figure3">Figure 3</xref> visualizes the AUROC values with 95% CIs across all CVD outcomes by gender, aligning with the summary in <xref ref-type="table" rid="table3">Table 3</xref>. The model showed the clearest advantage for composite CVD, with a higher AUROC than both QRISK-defined and individual outcome-specific models.</p><p>Across all outcomes, C-index values were consistently 0.01&#x2010;0.05 lower than AUROC. Model performance was generally higher in females across AUROC, C-index, and accuracy. The thresholds used to classify positive cases ranged from 0.23% to 0.40% in males and 0.23% to 0.34% in females, with higher thresholds observed in males, consistent with their event distribution and calibration characteristics.</p><p>In the right panel of <xref ref-type="fig" rid="figure3">Figure 3</xref>, the MT-BERT model is compared against benchmark models, including conventional approaches (QRISK3 [<xref ref-type="bibr" rid="ref4">4</xref>] and CoxPH [<xref ref-type="bibr" rid="ref25">25</xref>]), ensemble-based methods (RSF, GBSA, and XGBS), and deep learning models (DeepSurv [<xref ref-type="bibr" rid="ref12">12</xref>] and DeepHit [<xref ref-type="bibr" rid="ref26">26</xref>]). These benchmark models were implemented based on our previous work [<xref ref-type="bibr" rid="ref24">24</xref>]. In this evaluation, the hybrid model consistently outperformed all comparators, with the greatest AUROC gains observed for composite CVD, especially in females.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Discrimination performance of the Multitask Bidirectional Encoder Representations from Transformers model across cardiovascular disease outcomes, in comparison with benchmark conventional and machine learning models. AUROC: area under the receiver operating characteristic curve; CHD: coronary heart disease; CVD: cardiovascular disease; DeepSurv: Deep Neural Network&#x2013;based Cox Model; DeepHit: Deep Neural Network for Competing Risks; GBSA: Gradient Boosted Survival Analysis; RSF: Random Survival Forest; XGBS: Extreme Gradient Boosted Survival Model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e76659_fig03.png"/></fig><p><xref ref-type="fig" rid="figure4">Figure 4</xref> presents the calibration plot for the predicted 10-year composite CVD risk using the MT-BERT model, stratified by gender. Predicted risks and observed event rates were plotted across deciles of predicted risk. In both genders, predicted risks increased monotonically across deciles, indicating good overall calibration. However, risk overestimation was evident in the upper deciles, particularly among males, suggesting inflation at the highest predicted risk levels. In females, predicted and observed risks were closely aligned in lower-risk and midrisk ranges, though moderate overprediction was also observed at the top deciles, where event rates were more variable. In particular, a dip in the observed rates was seen in the ninth decile, which we attribute to event sparsity and statistical variability in the highest risk groups. These findings are consistent with the Brier scores in <xref ref-type="table" rid="table3">Table 3</xref>, where males had a score of 0.130 and females had 0.091, reflecting slightly better overall calibration in women. Taken together, the hybrid model provided reasonably calibrated risk estimates for composite CVD, with modest overprediction at higher predicted risk levels, especially in males.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Calibration plots for 10-year cardiovascular disease risk by gender. CVD: cardiovascular disease.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e76659_fig04.png"/></fig><p>KM survival curves for 10-year composite CVD risks are presented in <xref ref-type="fig" rid="figure5">Figure 5</xref>, stratified by predicted risk groups in both genders. Participants were classified into high- and low-risk categories using gender-specific thresholds: 40% or greater for men and 34% or greater for women. In both genders, individuals in the high-risk group exhibited significantly lower event-free survival over time, confirming the model&#x2019;s ability to stratify patients according to long-term risk. Among males, the 10-year event-free probability declined to approximately 74% in the high-risk group, compared to over 93% in the low-risk group. A similar pattern was observed in females, with high-risk individuals showing notably steeper declines in survival probability. Log-rank tests yielded <italic>P</italic> values &#x003C;.001 in both genders, indicating statistically significant separation between risk groups. These results support the prognostic utility of the model&#x2019;s predicted risk scores, demonstrating effective stratification for composite CVD outcomes over a 10-year follow-up period.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Kaplan-Meier survival curves for 10-year cardiovascular disease risk, stratified by predicted risk groups by gender.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e76659_fig05.png"/></fig><p>Ethnicity (n=9) subgroups include White, Indian, Pakistani, Bangladeshi, Chinese, Other Asian, Black Caribbean, Black African, and Other ethnic groups. Ethnicity (n=3) aggregates broader categories into White, Asian (including Indian, Pakistani, Bangladeshi, Chinese, and Other Asian), and Black (Caribbean and African). Deprivation (n=5) refers to Townsend quintiles, while deprivation (n=10) refers to Townsend deciles.</p></sec><sec id="s3-3"><title>Model Fairness and Generalizability Across Demographics</title><p>We assessed the performance of the MT-BERT model on the composite CVD outcome across ethnic and deprivation subgroups in the spatial validation set, separately by gender. <xref ref-type="fig" rid="figure6">Figure 6</xref> shows AUROC values with 95% CIs for major ethnic and deprivation categories. <xref ref-type="table" rid="table4">Table 4</xref> presents <italic>I</italic>&#x00B2; and Q statistics based on both broad and more granular subgroup definitions. All results pertain to the composite CVD outcome.</p><p>Among all patients, the AUROC for composite CVD was 0.736 (95% CI 0.729&#x2010;0.741) in males and 0.775 (95% CI 0.768&#x2010;0.780) in females. These represent a moderate decline compared to the internal test set, though overall generalizability remained acceptable. Other performance metrics are provided in Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, where similar trends were observed&#x2014;slight reductions compared to internal validation, but performance remained within a reasonable range. Across all subgroups, model discrimination was consistently higher in females than in males (<xref ref-type="fig" rid="figure6">Figure 6</xref>), with an average AUROC difference of approximately 0.04&#x2010;0.05 between genders. This pattern was consistent with results from the training, validation, and test sets (Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>; <xref ref-type="table" rid="table3">Table 3</xref>).</p><p>In males, model performance in the White ethnic group was marginally higher than in the overall male cohort, though the difference was not statistically significant. AUROC in the Black ethnic group was slightly lower, but the CIs overlapped with the overall population, indicating broadly comparable performance. The lowest AUROC was observed in the South Asian group, where a more pronounced decline was seen, and the CIs did not overlap with those of the White or overall male population, suggesting reduced discriminative ability in this subgroup. In females, similar patterns were observed. AUROC remained relatively consistent across the 3 main ethnic groups, though a modest decline was again noted in South Asian females, and a more notable drop was observed in Black females. When aggregated into 3 ethnic groups, <italic>I</italic>&#x00B2; values were 93.03% in males and 91.41% in females, indicating substantial heterogeneity (<xref ref-type="table" rid="table4">Table 4</xref>). Even with finer stratification into 9 ethnic groups, <italic>I</italic>&#x00B2; remained elevated (71.51% in males and 77.06% in females), suggesting that differences in model discrimination were not solely driven by broad grouping.</p><p>For deprivation, <xref ref-type="fig" rid="figure6">Figure 6</xref> shows a decreasing trend in AUROC from the least to the most deprived groups. In females, AUROC declined steadily across the most deprived 40%. In males, performance dropped notably in the 60%&#x2010;80% deprivation quantile but showed partial recovery in the most deprived group. Correspondingly, heterogeneity in deprivation-based analyses remained high, with <italic>I</italic>&#x00B2; values of 83.34% (quintile-based) and 74.91% (decile-based) in males, and 82.73% and 81.55%, respectively, in females (<xref ref-type="table" rid="table4">Table 4</xref>).</p><p>Overall, heterogeneity across both ethnicity and deprivation subgroups was notable, with <italic>I</italic>&#x00B2; consistently above 70% and all Q-tests statistically significant (<italic>P</italic>&#x003C;.001), except for the 9-category ethnicity analysis in males. These findings suggest that while the model maintained acceptable generalizability, subgroup-level variability in performance exists and may warrant further investigation in future work.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Assessment of model fairness across ethnic and deprivation subgroups (&#x201C;spatial&#x201D; validation set). AUROC: area under the receiver operating characteristic curve.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e76659_fig06.png"/></fig><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Heterogeneity (<italic>I</italic>&#x00B2; and Q Statistics) by ethnicity and deprivation subgroups in the &#x201C;spatial&#x201D; validation set.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Subgroup (n)</td><td align="left" valign="bottom" colspan="2">Male</td><td align="left" valign="bottom" colspan="2">Female</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><italic>I</italic>&#x00B2; (%)</td><td align="left" valign="top">Q (<italic>P</italic> value)</td><td align="left" valign="top"><italic>I</italic>&#x00B2; (%)</td><td align="left" valign="top">Q (<italic>P</italic> value)</td></tr></thead><tbody><tr><td align="left" valign="top">Ethnicity (3)</td><td align="left" valign="top">93.03</td><td align="left" valign="top">28.70 (<italic>P</italic>&#x003C;.001)</td><td align="left" valign="top">91.41</td><td align="left" valign="top">23.28 (<italic>P</italic>&#x003C;.001)</td></tr><tr><td align="left" valign="top">Ethnicity (9)</td><td align="left" valign="top">71.51</td><td align="left" valign="top">28.08 (<italic>P</italic>&#x003C;.001)</td><td align="left" valign="top">77.06</td><td align="left" valign="top">34.87 (<italic>P</italic>&#x003C;.001)</td></tr><tr><td align="left" valign="top">Deprivation (5)</td><td align="left" valign="top">83.34</td><td align="left" valign="top">24.01 (<italic>P</italic>&#x003C;.001)</td><td align="left" valign="top">82.73</td><td align="left" valign="top">23.16 (<italic>P</italic>&#x003C;.001)</td></tr><tr><td align="left" valign="top">Deprivation (10)</td><td align="left" valign="top">74.91</td><td align="left" valign="top">35.87 (<italic>P</italic>&#x003C;.001)</td><td align="left" valign="top">81.55</td><td align="left" valign="top">48.78 (<italic>P</italic>&#x003C;.001)</td></tr></tbody></table></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The proposed hybrid MT-BERT model demonstrated strong performance in predicting 10-year CVD risk by integrating structured variables and free-text clinical notes from EHRs. While previous studies have explored the use of clinical text for risk prediction [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>], few have focused on long-term CVD outcomes or used integrated multitask modeling. In this study, the model achieved high discrimination, demonstrated good calibration across predicted risk groups, and enabled effective survival stratification based on KM curves. Compared to conventional and ML-based survival models, the inclusion of unstructured text improved performance, particularly in females. These findings were consistent across both the internal test set and a demographically distinct &#x201C;spatial external&#x201D; validation cohort from London, supporting the model&#x2019;s generalizability and potential clinical utility.</p><p>A key methodological contribution of this study is the integration of structured and unstructured EHR data within a BERT-MLP architecture, optimized through a unified multitask learning framework. Compared to categorical or binary representations, textual input can provide richer information about disease severity, temporal context, or interactions between conditions that structured variables may not explicitly encode. Moreover, transforming structured features into natural language text reduces data sparsity and produces denser, more learnable representations [<xref ref-type="bibr" rid="ref29">29</xref>]. This transformation is particularly suited for BERT, which is optimized for NLP and benefits from pretrained language representations that enhance its ability to model semantic relationships, even in low-resource settings or when encountering rare comorbidities [<xref ref-type="bibr" rid="ref30">30</xref>]. Thus, our approach allows for both increased model expressiveness and improved generalization to underrepresented patient profiles.</p><p>Building on prior work in deep survival modeling, such as DeepSurv [<xref ref-type="bibr" rid="ref12">12</xref>] and DeepHit [<xref ref-type="bibr" rid="ref26">26</xref>], the model incorporates a classification objective to support threshold-based clinical decision-making. By jointly minimizing a combined Focal and Cox-based loss, it enables simultaneous optimization of binary classification (AUROC) and survival risk ranking (C-index). This dual-objective design allows the model to capture both event occurrence and timing, enhancing its applicability in real-world risk stratification.</p><p>To ensure comparability, data extraction followed the protocol from our previous work [<xref ref-type="bibr" rid="ref24">24</xref>] on ML-based survival models for CVD, maintaining consistency in preprocessing and enabling direct performance comparisons. Under this aligned experimental setup, the hybrid model outperformed earlier conventional and ML-based models, demonstrating the effectiveness of combining representation learning with multiobjective optimization. Importantly, these comparators already realize the principal ablation settings within an identical pipeline, namely, structure-only survival modeling with a single Cox objective (CoxPH, RSF, and DeepSurv). They therefore provide a pragmatic assessment of modality and loss without reimplementing stripped-down variants of MT-BERT.</p><p>Furthermore, compared to training separate models for classification and survival analysis, our multitask learning framework offers several advantages. First, by sharing representations between tasks, the model captures both static risk factors and temporal dynamics more efficiently, leveraging complementary information to enhance overall predictive performance [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. Binary classification and survival modeling address distinct but related goals [<xref ref-type="bibr" rid="ref12">12</xref>]: classification focuses on identifying individuals at high immediate risk, while survival modeling captures the time-dependent risk trajectory. Learning these jointly allows the model to optimize decision thresholds and risk ranking simultaneously. Additionally, shared representations act as a form of regularization, improving generalization especially in underrepresented subgroups [<xref ref-type="bibr" rid="ref31">31</xref>]. Although we did not run a full factorial ablation, the existing comparators already realize the principal ablation settings within the same pipeline, namely, structure-only survival modeling with a single Cox objective. The consistent gains in both discrimination and calibration indicate a benefit from multitask integration, and we will conduct targeted ablations in future work to quantify incremental contributions beyond these baselines.</p><p>Despite these overall gains, subgroup analyses revealed reduced discrimination in certain populations, particularly males from deprived and ethnic minority groups. Similar disparities have been observed in other clinical ML applications [<xref ref-type="bibr" rid="ref33">33</xref>], underscoring the importance of fairness-aware evaluation and algorithm refinement. Substantial heterogeneity (<italic>I</italic>&#x00B2;&#x003E;70%) across ethnicity and deprivation strata further highlights the need for subgroup-specific performance auditing. These findings emphasize the necessity of addressing demographic variability to support equitable deployment of predictive models in diverse populations.</p></sec><sec id="s4-2"><title>Limitations</title><p>This study has several limitations that merit consideration. While the hybrid MT-BERT architecture offers enhanced representational capacity by integrating free-text and structured data, it also introduces considerable complexity [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. BERT-based models are computationally intensive, requiring substantially more memory and processing time than MLPs trained solely on structured inputs. The need to process long token sequences increases both training and inference latency, which may limit scalability in resource-constrained environments. Additionally, textual representations derived from BERT may suffer from noise due to variability in clinical phrasing and tokenization. For instance, expressions such as &#x201C;No history of diabetes&#x201D; and &#x201C;Diabetes: none&#x201D; may yield inconsistent embeddings, introducing ambiguity in comorbidity interpretation. This variability in tokenization can further propagate semantic drift, where slight differences in phrasing result in divergent embeddings despite similar clinical meaning. These factors suggest that although BERT provides valuable semantic richness, its use must be balanced against concerns around efficiency, representational stability, and semantic fidelity in EHR settings.</p><p>Another methodological challenge lies in the joint optimization of binary classification (AUROC) and survival ranking (C-index) within a multitask framework. Although this design captures complementary dimensions of CVD risk, the 2 objectives are not always easily harmonized. In our results, C-index scores were consistently lower than AUROC, indicating difficulty in modeling TTE information with the same fidelity as event discrimination. In addition, evaluating a true 10-year TTE objective with censoring and aligning predictors with outcome timing typically yields a more conservative AUROC than shorter or time-gated tasks, which helps explain why our discrimination is lower than some reports. Future work may benefit from refined training strategies or alternative loss functions to better align these targets.</p><p>Model comparisons were conducted under a shared experimental framework established in our prior work [<xref ref-type="bibr" rid="ref24">24</xref>], with consistent data extraction and preprocessing across all models. This enabled fair benchmarking but does not eliminate concerns about potential biases from repeated records or unobserved confounding. Given that the same CPRD dataset, variable and outcome definitions, cohort construction strategies, and censoring handling were applied, these models serve as appropriate comparators for evaluating the added value of MT-BERT. Within this like-for-like design, MT-BERT exceeded the conventional and ML baselines from our previous benchmark, with gains that were modest in absolute terms yet consistent across outcomes, and with clearer KM separation and better calibration. Independent replication in other settings will nonetheless be critical to validate generalizability.</p><p>The model also showed differential performance across outcomes. Prediction was most effective for composite CVD events, likely due to broader definitions and higher case counts [<xref ref-type="bibr" rid="ref34">34</xref>]. In contrast, individual outcomes such as MI and vascular dementia were more difficult to model, reflecting data sparsity and noisier labels. MI is commonly harder because many events are precipitated by acute triggers that are not present in baseline features, which limits discriminability over a 10-year horizon. These findings highlight the practical value of composite endpoints in primary care, where follow-up is long and individual outcomes are rare. Future work will incorporate richer short-term and time-varying signals, for example, recent trajectories of vitals and laboratory results, medication changes, secondary-care encounters, and uncertainty-aware learning, to strengthen outcome-specific prediction.</p><p>Although generalizability was demonstrated between the development and spatial external validation sets, the model has not yet been evaluated across other health care systems or geographic regions. The London cohort provided important demographic diversity, especially in ethnicity and deprivation, but broader validation is required. Notably, performance declined in South Asian and socioeconomically deprived males, potentially due to lower case numbers and greater uncertainty. This highlights the need for targeted data enrichment and uncertainty-aware learning in underrepresented subgroups. Fairness-related issues also remain. While subgroup audits were conducted, mitigation strategies such as sample reweighting or recalibration were not applied, and systematic subgroup-specific auditing will be required before equitable clinical deployment can be ensured.</p><p>From an implementation perspective, the present work relied on DistilBERT, a relatively lightweight model pretrained on general rather than medical corpora. This choice reduced computational cost, allowing training within hours on high-performance computing environments and inference within seconds on standard CPUs, demonstrating feasibility for near real-time use in primary care dashboards, such as flagging high-risk patients for recall or displaying updated risk estimates during consultations. However, the absence of domain-specific pretraining remains a limitation, and larger biomedical language models trained on richer corpora are likely to achieve superior performance in the future. While deployment at scale is unlikely to pose major challenges, training requirements will ultimately depend on the size and complexity of the pretrained models used, and these aspects will need to be formally evaluated.</p><p>Interpretability also remains a key barrier to adoption. Deep learning models are often considered &#x201C;black boxes,&#x201D; in contrast to conventional statistical methods [<xref ref-type="bibr" rid="ref6">6</xref>]. For structured variables, attribution methods such as SHapley Additive exPlanations (SHAP) can provide ranked contributions, enabling clinicians to see which risk factors (eg, blood pressure and diabetes status) were most influential. For clinical text, attention mechanisms can highlight salient tokens or phrases (eg, &#x201C;chest pain&#x201D; and &#x201C;family history of stroke&#x201D;) that received higher model weights, offering a complementary perspective. These cross-modal explanations require distinct visualization strategies, and we did not implement a systematic framework for presenting them to clinicians in this study. While such tools may improve explainability, the trade-off between predictive performance and interpretability must be carefully considered [<xref ref-type="bibr" rid="ref35">35</xref>]. In contexts where textual data add limited value, simpler approaches such as RSF may provide comparable results with greater transparency. For long-term CVD prediction, robust risk stratification may ultimately be more clinically relevant than token-level explanations, yet future research should examine how to deliver outputs in formats that are both faithful to the model and usable in practice.</p><p>Importantly, this model is not intended to replace established risk tools such as QRISK4 [<xref ref-type="bibr" rid="ref36">36</xref>], which have undergone extensive validation. Rather, it illustrates the potential of combining structured and unstructured EHR data within a multitask learning framework to support multioutcome prediction. Prospective validation in external health systems, integration into clinical care pathways, development of clinician-facing interpretability tools, and incorporation of fairness-aware strategies will be essential to translate these preliminary findings into real-world practice.</p></sec><sec id="s4-3"><title>Conclusion</title><p>This study presents a multitask deep learning model for 10-year CVD risk prediction using real-world EHR data, integrating structured variables and unstructured clinical text through an MT-BERT architecture. By jointly optimizing classification and survival objectives, the model achieves strong discrimination, robust calibration, and meaningful risk stratification over a 10-year follow-up. Performance remained consistent in a demographically diverse London subgroup, supporting its generalizability. Although improvements over previously tested ML-based models were modest, these findings provide preliminary evidence that combining representation learning and multiobjective optimization is a promising direction for risk prediction in primary care. However, subgroup differences across ethnicity and deprivation highlight persistent fairness concerns. Future work should focus on improving interpretability, validating in external populations, and evaluating clinical impact to support real-world implementation.</p></sec></sec></body><back><ack><p>VC is supported by the Engineering and Physical Sciences Research Council (EPSRC)-funded King&#x2019;s Health Partners Digital Health Hub (EP/X030628/1). TL is a PhD student at KCL&#x2019;s DRIVE-Health CDT supported by Metadvice Ltd. and the National Institute for Health Research (NIHR) Biomedical Research Centre based at Guy&#x2019;s and St Thomas&#x2019; NHS Foundation Trust and King&#x2019;s College London (IS-BRC-1215-20006). The views expressed are those of the author(s) and not necessarily those of the NHS, the NIHR, or the Department of Health.</p></ack><notes><sec><title>Data Availability</title><p>The CPRD Aurum EHR dataset used in this study is accessible through a formal application [<xref ref-type="bibr" rid="ref37">37</xref>]<ext-link ext-link-type="uri" xlink:href="https://www.cprd.com/data-access.The">. </ext-link>The model implementation, algorithm details, and technical components are openly available within the article and on GitHub [<xref ref-type="bibr" rid="ref38">38</xref>].</p></sec></notes><fn-group><fn fn-type="con"><p>TL developed the deep learning model, performed the statistical analyses, generated the results figures and tables, and prepared the initial manuscript draft under the guidance of VC and LL. YW, AK, and LL contributed to manuscript revisions and provided valuable insights. All authors critically reviewed the manuscript and approved the final version for publication.</p></fn><fn fn-type="conflict"><p>AK serves as the Chief Medical Officer at Metadvice, a company specializing in precision medicine technologies.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">AUROC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb3">BERT</term><def><p>Bidirectional Encoder Representations from Transformers</p></def></def-item><def-item><term id="abb4">C-index</term><def><p>concordance index</p></def></def-item><def-item><term id="abb5">CALIBER</term><def><p>Clinical research using Linked Bespoke studies and Electronic health Records</p></def></def-item><def-item><term id="abb6">CALIBER</term><def><p>CArdiovascular disease research using LInked BEspoke studies and Electronic health Records</p></def></def-item><def-item><term id="abb7">CHD</term><def><p>coronary heart disease</p></def></def-item><def-item><term id="abb8">CPRD</term><def><p>Clinical Practice Research Datalink</p></def></def-item><def-item><term id="abb9">CT</term><def><p>Systematized Nomenclature of Medicine &#x2013; Clinical Terms</p></def></def-item><def-item><term id="abb10">CVD</term><def><p>cardiovascular disease</p></def></def-item><def-item><term id="abb11">DeepHit</term><def><p>Deep Neural Network for Competing Risks</p></def></def-item><def-item><term id="abb12">DeepSurv</term><def><p>Deep Neural Network-based Cox Model</p></def></def-item><def-item><term id="abb13">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb14">GBSA</term><def><p>Gradient Boosted Survival Analysis</p></def></def-item><def-item><term id="abb15">GP</term><def><p>general practitioner</p></def></def-item><def-item><term id="abb16"><italic>ICD-10</italic></term><def><p><italic>International Classification of Diseases, Tenth Revision</italic></p></def></def-item><def-item><term id="abb17">ISAC</term><def><p>Independent Scientific Advisory Committee</p></def></def-item><def-item><term id="abb18">KM </term><def><p>Kaplan-Meier</p></def></def-item><def-item><term id="abb19">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb20">MLP</term><def><p>multilayer perceptron</p></def></def-item><def-item><term id="abb21">NICE</term><def><p>National Institute for Health and Care Excellence</p></def></def-item><def-item><term id="abb22">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb23">RSF</term><def><p>Random Survival Forest</p></def></def-item><def-item><term id="abb24">SBP</term><def><p>systolic blood pressure</p></def></def-item><def-item><term id="abb25">SHAP</term><def><p>SHapley Additive exPlanations</p></def></def-item><def-item><term id="abb26">TRIPOD</term><def><p>Transparent Reporting of a multivariable prediction model for Individual Prognosis Or Diagnosis</p></def></def-item><def-item><term id="abb27">TTE</term><def><p>time-to-event</p></def></def-item><def-item><term id="abb28">XGBS</term><def><p>Extreme Gradient Boosting Survival</p></def></def-item><def-item><term id="abb29">XGBS</term><def><p>Extreme Gradient Boosted Survival Model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>UK cardiovascular disease factsheet</article-title><source>British Heart Foundation</source><year>2024</year><month>01</month><access-date>2025-10-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.bhf.org.uk/-/media/files/for-professionals/research/heart-statistics/bhf-cvd-statistics-uk-factsheet.pdf?rev=5503842f012d416994618f73f5e76667&#x0026;hash=B25531514E2326EBAD737D565D969EEA">https://www.bhf.org.uk/-/media/files/for-professionals/research/heart-statistics/bhf-cvd-statistics-uk-factsheet.pdf?rev=5503842f012d416994618f73f5e76667&#x0026;hash=B25531514E2326EBAD737D565D969EEA</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="web"><article-title>Number of people living with diabetes in the UK tops 5 million for the first time</article-title><source>Diabetes UK</source><year>2023</year><access-date>2025-10-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.diabetes.org.uk/about-us/news-and-views/number-people-living-diabetes-uk-tops-5-million-first-time">https://www.diabetes.org.uk/about-us/news-and-views/number-people-living-diabetes-uk-tops-5-million-first-time</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><article-title>Cardiovascular disease: risk assessment and reduction, including lipid modification report no: NG238</article-title><source>National Institute for Health and Care Excellence (NICE)</source><year>2023</year><month>12</month><access-date>2024-07-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.nice.org.uk/guidance/ng238">https://www.nice.org.uk/guidance/ng238</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hippisley-Cox</surname><given-names>J</given-names> </name><name name-style="western"><surname>Coupland</surname><given-names>C</given-names> </name><name name-style="western"><surname>Brindle</surname><given-names>P</given-names> </name></person-group><article-title>Development and validation of QRISK3 risk prediction algorithms to estimate future risk of cardiovascular disease: prospective cohort study</article-title><source>BMJ</source><year>2017</year><month>05</month><day>23</day><volume>357</volume><fpage>j2099</fpage><pub-id pub-id-type="doi">10.1136/bmj.j2099</pub-id><pub-id pub-id-type="medline">28536104</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Virani</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Newby</surname><given-names>LK</given-names> </name><name name-style="western"><surname>Arnold</surname><given-names>SV</given-names> </name><etal/></person-group><article-title>2023 AHA/ACC/ACCP/ASPC/NLA/PCNA Guideline for the management of patients with chronic coronary disease: a report of the American Heart Association/American College of Cardiology Joint Committee on Clinical Practice Guidelines</article-title><source>Circulation</source><year>2023</year><month>08</month><day>29</day><volume>148</volume><issue>9</issue><fpage>e9</fpage><lpage>e119</lpage><pub-id pub-id-type="doi">10.1161/CIR.0000000000001168</pub-id><pub-id pub-id-type="medline">37471501</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Krentz</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Curcin</surname><given-names>V</given-names> </name></person-group><article-title>Machine learning based prediction models for cardiovascular disease risk using electronic health records data: systematic review and meta-analysis</article-title><source>Eur Heart J Digit Health</source><year>2025</year><month>01</month><volume>6</volume><issue>1</issue><fpage>7</fpage><lpage>22</lpage><pub-id pub-id-type="doi">10.1093/ehjdh/ztae080</pub-id><pub-id pub-id-type="medline">39846062</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Krentz</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Huo</surname><given-names>Z</given-names> </name><name name-style="western"><surname>&#x0106;ur&#x010D;in</surname><given-names>V</given-names> </name></person-group><article-title>Opportunities and challenges of cardiovascular disease risk prediction for primary prevention using machine learning and electronic health records: a systematic review</article-title><source>Rev Cardiovasc Med</source><year>2025</year><month>04</month><volume>26</volume><issue>4</issue><fpage>37443</fpage><pub-id pub-id-type="doi">10.31083/RCM37443</pub-id><pub-id pub-id-type="medline">40351688</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Razieh</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zaccardi</surname><given-names>F</given-names> </name><name name-style="western"><surname>Miksza</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Differences in the risk of cardiovascular disease across ethnic groups: UK Biobank observational study</article-title><source>Nutr Metab Cardiovasc Dis</source><year>2022</year><month>11</month><volume>32</volume><issue>11</issue><fpage>2594</fpage><lpage>2602</lpage><pub-id pub-id-type="doi">10.1016/j.numecd.2022.08.002</pub-id><pub-id pub-id-type="medline">36064688</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eastwood</surname><given-names>SV</given-names> </name><name name-style="western"><surname>Mathur</surname><given-names>R</given-names> </name><name name-style="western"><surname>Sattar</surname><given-names>N</given-names> </name><name name-style="western"><surname>Smeeth</surname><given-names>L</given-names> </name><name name-style="western"><surname>Bhaskaran</surname><given-names>K</given-names> </name><name name-style="western"><surname>Chaturvedi</surname><given-names>N</given-names> </name></person-group><article-title>Ethnic differences in guideline-indicated statin initiation for people with type 2 diabetes in UK primary care, 2006-2019: a cohort study</article-title><source>PLoS Med</source><year>2021</year><month>06</month><volume>18</volume><issue>6</issue><fpage>e1003672</fpage><pub-id pub-id-type="doi">10.1371/journal.pmed.1003672</pub-id><pub-id pub-id-type="medline">34185782</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="web"><article-title>Socioeconomic inequalities in heart and circulatory diseases in england: an analysis</article-title><source>British Heart Foundation</source><year>2025</year><month>01</month><access-date>2025-10-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.bhf.org.uk/what-we-do/our-research/heart-statistics/health-inequalities-research/inequalities-in-heart-and-circulatory-diseases-in-england">https://www.bhf.org.uk/what-we-do/our-research/heart-statistics/health-inequalities-research/inequalities-in-heart-and-circulatory-diseases-in-england</ext-link></comment></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rajkomar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dean</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kohane</surname><given-names>I</given-names> </name></person-group><article-title>Machine learning in medicine</article-title><source>N Engl J Med</source><year>2019</year><month>04</month><day>4</day><volume>380</volume><issue>14</issue><fpage>1347</fpage><lpage>1358</lpage><pub-id pub-id-type="doi">10.1056/NEJMra1814259</pub-id><pub-id pub-id-type="medline">30943338</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Katzman</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Shaham</surname><given-names>U</given-names> </name><name name-style="western"><surname>Cloninger</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bates</surname><given-names>J</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kluger</surname><given-names>Y</given-names> </name></person-group><article-title>DeepSurv: personalized treatment recommender system using a Cox proportional hazards deep neural network</article-title><source>BMC Med Res Methodol</source><year>2018</year><month>02</month><day>26</day><volume>18</volume><issue>1</issue><fpage>24</fpage><pub-id pub-id-type="doi">10.1186/s12874-018-0482-1</pub-id><pub-id pub-id-type="medline">29482517</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shickel</surname><given-names>B</given-names> </name><name name-style="western"><surname>Tighe</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Bihorac</surname><given-names>A</given-names> </name><name name-style="western"><surname>Rashidi</surname><given-names>P</given-names> </name></person-group><article-title>Deep EHR: a survey of recent advances in deep learning techniques for electronic health record (EHR) analysis</article-title><source>IEEE J Biomed Health Inform</source><year>2018</year><month>09</month><volume>22</volume><issue>5</issue><fpage>1589</fpage><lpage>1604</lpage><pub-id pub-id-type="doi">10.1109/JBHI.2017.2767063</pub-id><pub-id pub-id-type="medline">29989977</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names> </name></person-group><article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title><source>arXiv</source><comment>Preprint posted online on  May 24, 2019</comment><pub-id pub-id-type="doi">10.48550/arXiv.1810.04805</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="report"><article-title>CPRD Aurum January 2022</article-title><year>2022</year><publisher-name>Clinical Practice Research Datalink</publisher-name><pub-id pub-id-type="doi">10.48329/DB7T-AY41</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Collins</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name><name name-style="western"><surname>Dhiman</surname><given-names>P</given-names> </name><etal/></person-group><article-title>TRIPOD+AI statement: updated guidance for reporting clinical prediction models that use regression or machine learning methods</article-title><source>BMJ</source><year>2024</year><month>04</month><day>16</day><volume>385</volume><fpage>e078378</fpage><pub-id pub-id-type="doi">10.1136/bmj-2023-078378</pub-id><pub-id pub-id-type="medline">38626948</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Riley</surname><given-names>RD</given-names> </name><name name-style="western"><surname>Snell</surname><given-names>KI</given-names> </name><name name-style="western"><surname>Ensor</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Minimum sample size for developing a multivariable prediction model: PART II &#x2010; binary and time&#x2010;to&#x2010;event outcomes</article-title><source>Stat Med</source><year>2019</year><month>03</month><day>30</day><volume>38</volume><issue>7</issue><fpage>1276</fpage><lpage>1296</lpage><pub-id pub-id-type="doi">10.1002/sim.7992</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thayer</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Mumtaz</surname><given-names>S</given-names> </name><name name-style="western"><surname>Elmessary</surname><given-names>MA</given-names> </name><etal/></person-group><article-title>Creating a next-generation phenotype library: the health data research UK Phenotype Library</article-title><source>JAMIA Open</source><year>2024</year><month>07</month><volume>7</volume><issue>2</issue><fpage>ooae049</fpage><pub-id pub-id-type="doi">10.1093/jamiaopen/ooae049</pub-id><pub-id pub-id-type="medline">38895652</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Denaxas</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gonzalez-Izquierdo</surname><given-names>A</given-names> </name><name name-style="western"><surname>Direk</surname><given-names>K</given-names> </name><etal/></person-group><article-title>UK phenomics platform for developing and validating electronic health record phenotypes: CALIBER</article-title><source>J Am Med Inform Assoc</source><year>2019</year><month>12</month><day>1</day><volume>26</volume><issue>12</issue><fpage>1545</fpage><lpage>1559</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocz105</pub-id><pub-id pub-id-type="medline">31329239</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Yan</surname><given-names>L</given-names> </name></person-group><article-title>QRISK3: 10-year cardiovascular disease risk calculator (QRISK3 2017)</article-title><source>ClinRisk Ltd</source><year>2019</year><access-date>2025-10-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://10.32614/CRAN.package.QRISK3">https://10.32614/CRAN.package.QRISK3</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Sanh</surname><given-names>V</given-names> </name><name name-style="western"><surname>Debut</surname><given-names>L</given-names> </name><name name-style="western"><surname>Chaumond</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wolf</surname><given-names>T</given-names> </name></person-group><article-title>DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 1, 2020</comment><pub-id pub-id-type="doi">10.48550/arXiv.1910.01108</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Kendall</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gal</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Cipolla</surname><given-names>R</given-names> </name></person-group><article-title>Multi-task learning using uncertainty to weigh losses for scene geometry and semantics</article-title><source>arXiv</source><comment>Preprint posted online on  Apr 24, 2018</comment><pub-id pub-id-type="doi">10.48550/arXiv.1705.07115</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Higgins</surname><given-names>JPT</given-names> </name><name name-style="western"><surname>Thompson</surname><given-names>SG</given-names> </name><name name-style="western"><surname>Deeks</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Altman</surname><given-names>DG</given-names> </name></person-group><article-title>Measuring inconsistency in meta-analyses</article-title><source>BMJ</source><year>2003</year><month>09</month><day>6</day><volume>327</volume><issue>7414</issue><fpage>557</fpage><lpage>560</lpage><pub-id pub-id-type="doi">10.1136/bmj.327.7414.557</pub-id><pub-id pub-id-type="medline">12958120</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Krentz</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Curcin</surname><given-names>V</given-names> </name></person-group><article-title>Benchmarking survival machine learning models for 10-year cardiovascular disease risk prediction using large-scale electronic health records</article-title><source>Digit Health</source><year>2025</year><month>04</month><day>30</day><comment><ext-link ext-link-type="uri" xlink:href="https://kclpure.kcl.ac.uk/portal/en/publications/benchmarking-survival-machine-learning-models-for-10-year-cardiov/">https://kclpure.kcl.ac.uk/portal/en/publications/benchmarking-survival-machine-learning-models-for-10-year-cardiov/</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Davidson-Pilon</surname><given-names>C</given-names> </name></person-group><article-title>Lifelines: survival analysis in Python</article-title><source>J Open Source Softw</source><year>2019</year><month>08</month><day>4</day><volume>4</volume><issue>40</issue><fpage>1317</fpage><pub-id pub-id-type="doi">10.21105/joss.01317</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zame</surname><given-names>W</given-names> </name><name name-style="western"><surname>Yoon</surname><given-names>J</given-names> </name><name name-style="western"><surname>Van der Schaar</surname><given-names>M</given-names> </name></person-group><article-title>DeepHit: a deep learning approach to survival analysis with competing risks</article-title><conf-name>Proceedigs of the AAAI Conference on Artificial Intelligence</conf-name><conf-date>Feb 2-7, 2018</conf-date><pub-id pub-id-type="doi">10.1609/aaai.v32i1.11842</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weng</surname><given-names>SF</given-names> </name><name name-style="western"><surname>Reps</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kai</surname><given-names>J</given-names> </name><name name-style="western"><surname>Garibaldi</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Qureshi</surname><given-names>N</given-names> </name></person-group><article-title>Can machine-learning improve cardiovascular risk prediction using routine clinical data?</article-title><source>PLoS ONE</source><year>2017</year><volume>12</volume><issue>4</issue><fpage>e0174944</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0174944</pub-id><pub-id pub-id-type="medline">28376093</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>A</given-names> </name><name name-style="western"><surname>PourNejatian</surname><given-names>N</given-names> </name><etal/></person-group><article-title>A large language model for electronic health records</article-title><source>npj Digit Med</source><volume>5</volume><issue>1</issue><fpage>194</fpage><pub-id pub-id-type="doi">10.1038/s41746-022-00742-2</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Mitra</surname><given-names>A</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Berlowitz</surname><given-names>D</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>H</given-names> </name></person-group><article-title>TransformEHR: transformer-based encoder-decoder generative model to enhance prediction of disease outcomes using electronic health records</article-title><source>Nat Commun</source><year>2023</year><month>11</month><day>29</day><volume>14</volume><issue>1</issue><fpage>7857</fpage><pub-id pub-id-type="doi">10.1038/s41467-023-43715-z</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Alsentzer</surname><given-names>E</given-names> </name><name name-style="western"><surname>Murphy</surname><given-names>JR</given-names> </name></person-group><article-title>Publicly available clinical BERT embeddings</article-title><source>arXiv</source><comment>Preprint posted online on  Jun 20, 2019</comment><pub-id pub-id-type="doi">10.48550/arXiv.1904.03323</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Ruder</surname><given-names>S</given-names> </name></person-group><article-title>An overview of multi-task learning in deep neural networks</article-title><source>arXiv</source><comment>Preprint posted online on  Jun 15, 2017</comment><pub-id pub-id-type="doi">10.48550/ARXIV.1706.05098</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Harutyunyan</surname><given-names>H</given-names> </name><name name-style="western"><surname>Khachatrian</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kale</surname><given-names>DC</given-names> </name><name name-style="western"><surname>Ver Steeg</surname><given-names>G</given-names> </name><name name-style="western"><surname>Galstyan</surname><given-names>A</given-names> </name></person-group><article-title>Multitask learning and benchmarking with clinical time series data</article-title><source>Sci Data</source><year>2019</year><month>06</month><day>17</day><volume>6</volume><issue>1</issue><fpage>96</fpage><pub-id pub-id-type="doi">10.1038/s41597-019-0103-9</pub-id><pub-id pub-id-type="medline">31209213</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Colacci</surname><given-names>M</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>YQ</given-names> </name><name name-style="western"><surname>Postill</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Sociodemographic bias in clinical machine learning models: a scoping review of algorithmic bias instances and mechanisms</article-title><source>J Clin Epidemiol</source><year>2025</year><month>02</month><volume>178</volume><fpage>111606</fpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2024.111606</pub-id><pub-id pub-id-type="medline">39532254</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rajkomar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Oren</surname><given-names>E</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Scalable and accurate deep learning with electronic health records</article-title><source>NPJ Digit Med</source><year>2018</year><month>05</month><day>8</day><volume>1</volume><issue>1</issue><fpage>18</fpage><pub-id pub-id-type="doi">10.1038/s41746-018-0029-1</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Lundberg</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SI</given-names> </name></person-group><article-title>A unified approach to interpreting model predictions</article-title><source>arXiv</source><comment>Preprint posted online on  Nov 25, 2017</comment><pub-id pub-id-type="doi">10.48550/arXiv.1705.07874</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hippisley-Cox</surname><given-names>J</given-names> </name><name name-style="western"><surname>Coupland</surname><given-names>CAC</given-names> </name><name name-style="western"><surname>Bafadhel</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Development and validation of a new algorithm for improved cardiovascular risk prediction</article-title><source>Nat Med</source><year>2024</year><month>05</month><volume>30</volume><issue>5</issue><fpage>1440</fpage><lpage>1447</lpage><pub-id pub-id-type="doi">10.1038/s41591-024-02905-y</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="web"><article-title>Access to data</article-title><source>CPRD</source><access-date>2025-10-30</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cprd.com/access-data">https://www.cprd.com/access-data</ext-link></comment></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="web"><article-title>TLiuBB/bert_paper</article-title><source>GitHub</source><access-date>2025-10-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/TLiuBB/BERT_paper">https://github.com/TLiuBB/BERT_paper</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Supplementary tables providing detailed information on model features, disease definitions, software packages, and extended performance metrics of the multitask BERT model across cardiovascular disease outcomes by gender and dataset split.</p><media xlink:href="medinform_v13i1e76659_app1.docx" xlink:title="DOCX File, 48 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Supplementary file containing phenotype definitions used in this study, including cardiovascular disease outcomes, comorbidities, and related risk factors, provided as individual CSV files.</p><media xlink:href="medinform_v13i1e76659_app2.zip" xlink:title="ZIP File, 77 KB"/></supplementary-material><supplementary-material id="app3"><label>Checklist 1</label><p>Transparent Reporting of a multivariable prediction model for Individual Prognosis Or Diagnosis checklist.</p><media xlink:href="medinform_v13i1e76659_app3.pdf" xlink:title="PDF File, 13389 KB"/></supplementary-material></app-group></back></article>