<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v14i1e84261</article-id><article-id pub-id-type="doi">10.2196/84261</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Bridging Population Patterns and Individual Prediction: Framework for Prospective Multimorbidity Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Qianyao</given-names></name><degrees>BBA</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Zhang</surname><given-names>Runtong</given-names></name><degrees>Prof Dr</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ma</surname><given-names>Weiguang</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zhao</surname><given-names>Butian</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zhu</surname><given-names>Xiaomin</given-names></name><degrees>Prof Dr</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Information Management, School of Economics and Management, Beijing Jiaotong University</institution><addr-line>No.3 Shangyuancun, Haidian District</addr-line><addr-line>Beijing</addr-line><country>China</country></aff><aff id="aff2"><institution>School of Management, Beijing University of Chinese Medicine</institution><addr-line>Beijing</addr-line><country>China</country></aff><aff id="aff3"><institution>School of Mechanical, Electronic and Control Engineering, Beijing Jiaotong University</institution><addr-line>Beijing</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Wang</surname><given-names>Jun</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Dong</surname><given-names>Lingfeng</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Hossain</surname><given-names>Md Zakir</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Lu</surname><given-names>Qincheng</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Niu</surname><given-names>Wanshu</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Shi</surname><given-names>Xin</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Runtong Zhang, Prof Dr, Department of Information Management, School of Economics and Management, Beijing Jiaotong University, No.3 Shangyuancun, Haidian District, Beijing, 100044, China, 86 010 51683854; <email>rtzhang@bjtu.edu.cn</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>10</day><month>3</month><year>2026</year></pub-date><volume>14</volume><elocation-id>e84261</elocation-id><history><date date-type="received"><day>16</day><month>09</month><year>2025</year></date><date date-type="accepted"><day>29</day><month>01</month><year>2026</year></date></history><copyright-statement>&#x00A9; Qianyao Zhang, Runtong Zhang, Weiguang Ma, Butian Zhao, Xiaomin Zhu. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 10.3.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2026/1/e84261"/><abstract><sec><title>Background</title><p>Multimorbidity has become a major global public health challenge. However, existing research primarily emphasizes the identification of disease patterns at the population level and lacks the capacity to provide predictive insights into individual future pattern membership. Bridging this gap is crucial for personalized prevention and management.</p></sec><sec><title>Objective</title><p>This study aims to propose an innovative framework that integrates population-level multimorbidity pattern recognition with individual-level predictive modeling, thus advancing multimorbidity research from descriptive analysis to prospective multimorbidity pattern prediction.</p></sec><sec sec-type="methods"><title>Methods</title><p>Using longitudinal health follow-up data, we first applied latent transition analysis (LTA) to identify temporally stable multimorbidity patterns. These patterns were subsequently transformed into predictive labels to construct a novel deep learning model, CLA-Net (Cross-Lag Attention Network). CLA-Net is designed to predict individual future multimorbidity patterns by leveraging the complementary strengths of Gated Recurrent Units (GRU) and transformer architectures. It introduces a bitemporal directed cross-attention mechanism to simultaneously capture temporal dependencies and complex feature interactions. We compared CLA-Net against several advanced baselines and conducted ablation studies to validate its architectural components.</p></sec><sec sec-type="results"><title>Results</title><p>In terms of pattern recognition, the LTA identified 5 clinically meaningful multimorbidity patterns: Cardiometabolic-Multisystem, Hypertension-Arthritis, Respiratory-Musculoskeletal, Metabolic Syndrome, and Gastritis-Arthritis. In terms of prediction, experimental results demonstrated that CLA-Net significantly outperformed all baseline models. CLA-Net achieved an accuracy of 0.8352 (SD 0.0048), a precision of 0.8326 (SD 0.0053), a recall of 0.8312 (SD 0.0056), and an <italic>F</italic><sub>1</sub>-score of 0.8319 (SD 0.0051). Notably, it achieved an area under the curve of 0.9293, surpassing baseline models. Ablation studies confirmed the necessity of the dual-branch architecture and the directed cross-attention mechanism, as removing these components resulted in performance declines ranging from 0.93% to 2.50%.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study extends the scope of LTA beyond descriptive statistical modeling and establishes the scientific value of multimorbidity pattern prediction as an independent research task. By bridging population-level insights with individual-level prediction, the proposed framework provides a data-driven tool for the prospective prediction of future multimorbidity pattern membership conditional on survival, thereby supporting stratified disease management and care planning, rather than general risk stratification for acute or end-stage deterioration. This offers new methodological and practical value for precision medicine and public health policymaking.</p></sec></abstract><kwd-group><kwd>multimorbidity</kwd><kwd>latent transition analysis</kwd><kwd>LTA</kwd><kwd>deep learning</kwd><kwd>population-level patterns</kwd><kwd>personalized medicine</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Multimorbidity, commonly defined as the coexistence of 2 or more chronic conditions within a single individual [<xref ref-type="bibr" rid="ref1">1</xref>], has emerged as one of the most pressing public health challenges of the 21st century amid the accelerating global trend of population aging [<xref ref-type="bibr" rid="ref2">2</xref>]. Worldwide, approximately one-third of adults have 2 or more chronic diseases, and the proportion exceeds 50% among individuals aged 60 years and older [<xref ref-type="bibr" rid="ref3">3</xref>]. Multimorbidity not only substantially increases health care resource consumption and imposes a heavy burden on health systems, but also severely compromises patients&#x2019; quality of life, leading to a markedly higher risk of functional disability and premature mortality, thereby greatly increasing the complexity of chronic disease management and clinical decision-making [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Previous studies have demonstrated that the co-occurrence of chronic diseases is not a random phenomenon but tends to manifest as distinct multimorbidity patterns, in which specific combinations of diseases frequently occur in particular populations [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. This suggests the potential existence of shared pathophysiological mechanisms, risk factors, or lifestyle determinants underlying these patterns [<xref ref-type="bibr" rid="ref8">8</xref>]. Therefore, systematically identifying, understanding, and predicting these multimorbidity patterns holds strategic importance for the early detection of high-risk individuals, the advancement of precision medicine, and the optimization of health care resource allocation [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>Although various methods for identifying multimorbidity patterns have been proposed, such as factor analysis, cluster analysis, latent class analysis (LCA), and network analysis [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref15">15</xref>], they share significant limitations and fall short of meeting the clinical demand for personalized and prospective management. First, most approaches group populations primarily based on disease prevalence or epidemiological characteristics, assuming homogeneity within groups while overlooking the inherent heterogeneity among individuals [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Such population-level stratification can facilitate the identification of common multimorbidity patterns at a macrolevel but fails to uncover microlevel differences within patterns, which is particularly inadequate for supporting fine-grained individualized management in clinical practice. In reality, even among patients belonging to the same multimorbidity pattern, disease trajectories may differ substantially. For example, some patients experience slow progression and remain stable over a long period, whereas others may rapidly develop multisystem dysfunction [<xref ref-type="bibr" rid="ref17">17</xref>]. Neglecting such heterogeneity may directly result in biased risk assessments and inefficient resource allocation, thereby severely constraining the practical implementation of precision medicine. In addition, existing studies have mainly focused on static multimorbidity pattern mining [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref18">18</xref>], and the methods used often struggle to maintain consistency of pattern meanings in longitudinal data. This instability not only reduces the comparability of research findings but also weakens the ability to capture disease progression, thereby limiting the clinical interpretability and practical value of the conclusions.</p><p>To address the above challenges, latent transition analysis (LTA), a longitudinal latent variable model, has been introduced into health research. LTA can identify stable latent health states across multiple time points and ensure their consistency and comparability over time [<xref ref-type="bibr" rid="ref19">19</xref>]. Compared to other longitudinal modeling approaches, LTA offers distinct advantages for identifying multimorbidity evolution. Unlike growth mixed models or latent growth curve models, which primarily characterize continuous trajectories of a single variable (eg, functional decline over time), LTA is specifically designed to model transitions between discrete, qualitative latent statuses, aligning perfectly with the categorical nature of multimorbidity patterns. Furthermore, in contrast to standard Markov chain models that rely on observed states, LTA separates measurement error from true structural transitions by inferring latent classes from multiple observed indicators, thereby ensuring more robust and clinically meaningful pattern identification. By modeling follow-up data, LTA can effectively capture heterogeneity within populations and provide new perspectives for understanding the dynamic evolution of multimorbidity patterns [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. However, LTA is essentially a descriptive statistical method, with its primary focus on identifying and summarizing patterns from historical or current data, while lacking the capacity to predict individual future health states. This limitation hinders its ability to meet urgent clinical needs, such as providing precise early warnings for high-risk individuals, developing timely intervention strategies, and guiding personalized treatment planning. Therefore, constructing predictive models that can accurately forecast individuals&#x2019; future multimorbidity patterns carries substantial clinical value and practical significance.</p><p>Predicting individual future multimorbidity patterns requires accounting for both the temporal evolution of health states and the complex interactions among multidimensional health features. On the one hand, the progression of chronic diseases exhibits marked and highly nonlinear temporal dependencies. A patient&#x2019;s current health status not only reflects their immediate physiological condition but also embodies the evolution from the previous health state, reflecting the staged characteristics of disease progression [<xref ref-type="bibr" rid="ref22">22</xref>]. Ignoring such sequential state transitions weakens a model&#x2019;s ability to capture disease progression trends, thereby limiting its capacity to deliver reliable forward-looking predictions. On the other hand, multidimensional health features, including lifestyle factors, preexisting conditions, and functional indicators, are interconnected through intricate nonlinear couplings and synergistic effects. These dynamic interactions drive the formation and evolution of multimorbidity patterns [<xref ref-type="bibr" rid="ref23">23</xref>]. Many conventional statistical methods, such as logistic regression, support vector machines, random forests, gradient boosting trees, and k-nearest neighbors (KNN), perform well in modeling static feature associations [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. However, when confronted with high-dimensional, nonlinear, and temporally dependent medical data, their modeling capacity is limited, making it difficult to simultaneously capture nonlinear state transitions and deep couplings among features. Therefore, there is an urgent need to develop a model that overcomes these limitations by effectively integrating sequential health state changes and feature coupling effects in longitudinal data, thus enabling precise and prospective prediction of individual future multimorbidity patterns and providing robust data support for clinical decision-making.</p><p>Based on the above discussion, this study proposes an innovative framework that integrates population-level multimorbidity pattern recognition with precise prediction of individual future states, thereby advancing multimorbidity research from macrolevel description to prospective application. Specifically, the framework first uses LTA to identify population-level multimorbidity patterns with temporal consistency and clinical stability from longitudinal follow-up data, and then transforms these patterns into individual predictive labels. On this basis, we designed the Cross-Lag Attention Network (CLA-Net), which leverages immediate longitudinal context to model short-term, nonlinear state transitions and complex feature interactions among health variables, thereby enabling accurate prediction of individual future multimorbidity patterns.</p><p>The contributions of this study can be summarized in 3 aspects. First, we propose a novel cross-paradigm framework that bridges a statistical latent-variable modeling approach with deep representation learning for longitudinal multimorbidity research. Specifically, population-level multimorbidity patterns identified through LTA are leveraged as clinically interpretable supervisory signals for individual-level prediction. By integrating latent structure discovery with prospective modeling, this framework overcomes the limitation of traditional multimorbidity studies that are largely restricted to retrospective pattern characterization and establishes multimorbidity pattern prediction as a distinct and clinically meaningful research task.</p><p>Second, we propose a novel hybrid neural network architecture, CLA-Net, for multimorbidity pattern prediction. CLA-Net integrates the inherent strength of Gated Recurrent Units (GRU) in efficiently modeling short-term, nonlinear state transitions with the powerful representational capacity of the transformer architecture in capturing complex feature interactions. This design surpasses the capacity of traditional linear models, enabling deep modeling of the intricate couplings within medical data. Furthermore, CLA-Net incorporates a bitemporal directed cross-attention mechanism, which establishes directional information channels between the preceding and current health states. This mechanism facilitates feature extraction across time steps and dynamic association modeling, allowing the network to simultaneously perceive and effectively integrate dynamic state changes and feature coupling effects within complex longitudinal medical data.</p><p>Third, the experimental results demonstrate that the proposed CLA-Net model significantly outperforms other mainstream baseline models. Further ablation studies confirm that the synergistic integration of GRU and transformer, together with the bitemporal directed cross-attention mechanism, is the key factor driving the performance improvement. These findings not only provide strong evidence for the superiority of CLA-Net and the soundness of its design but also offer new insights and paradigms for model development in the field of multimorbidity pattern prediction.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>We designed a novel research framework that integrates multimorbidity pattern recognition with individualized prediction. The overarching idea is to first identify population-level latent multimorbidity patterns with temporal consistency from longitudinal health data, and then build a deep learning model to predict each individual&#x2019;s future pattern membership, thereby establishing a unified framework that bridges population-level recognition and individual-level prediction. As illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>, the proposed framework consists of 3 stages. In the first stage, LTA is applied to longitudinal health records to identify latent multimorbidity patterns at the population level, ensuring that these patterns maintain consistent clinical meaning and comparability across different time points, thus providing a stable and reliable label basis for subsequent individual prediction tasks. In the second stage, based on the LTA-derived pattern labels, we design and construct the CLA-Net, which integrates temporal dependencies and complex feature interactions to enable accurate prediction of individual future pattern membership. In the third stage, we conduct a comprehensive performance evaluation and validation of the prediction model.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Proposed framework for multimorbidity pattern recognition and prediction. CLA-Net: Cross-Lag Attention Network; LTA: latent transition analysis.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84261_fig01.png"/></fig></sec><sec id="s2-2"><title>Multimorbidity Pattern Identification</title><p>This study used SAS 9.4 (SAS Institute) to apply LTA for identifying the latent class structures of population-level multimorbidity patterns across 5 waves of China Health and Retirement Longitudinal Study (CHARLS) data. LTA is a longitudinal analytic method based on finite mixture models, which can detect unobserved health subgroups (latent statuses) within the sample across multiple time points. By imposing measurement invariance constraints, LTA ensures temporal consistency and stability of the latent classes [<xref ref-type="bibr" rid="ref20">20</xref>], thereby guaranteeing that the identified classes maintain comparable and clinically meaningful interpretations across different time points. This provides stable and reliable classification labels for subsequent individual prediction modeling tasks. To control classification error and enhance model stability, we adopted a 2-step analytic strategy [<xref ref-type="bibr" rid="ref26">26</xref>]. The detailed procedure is as follows: in step 1, we first conducted LCA separately at each time point to explore the optimal number of latent class partitions. In the model specification process, we considered different numbers of latent classes (ranging from 2 to 6) [<xref ref-type="bibr" rid="ref26">26</xref>]. Model fit was evaluated using the Akaike Information Criterion (AIC), Bayesian Information Criterion (BIC), sample-size adjusted BIC (SaBIC), and entropy, to determine the optimal class solution.</p><p>Specifically, the AIC balances model fit and complexity, with smaller values indicating better fit [<xref ref-type="bibr" rid="ref27">27</xref>]. The BIC adds a penalty for sample size, favoring more parsimonious models and thus being more conservative in class determination, particularly in large samples [<xref ref-type="bibr" rid="ref28">28</xref>]. The SaBIC further adjusts BIC for sample size, offering greater robustness in large-sample contexts [<xref ref-type="bibr" rid="ref29">29</xref>]. Entropy (ranging from 0 to 1) measures the certainty of individual classification, with values above 0.80 generally indicating high classification accuracy [<xref ref-type="bibr" rid="ref30">30</xref>]. In addition, we used the Bootstrap Likelihood Ratio Test (BLRT) to compare log-likelihood differences between adjacent class models, in order to assess whether adding an extra class significantly improves model fit [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>].</p><p>In step 2, after determining the optimal number of classes, we conducted chi-square tests on the log-likelihood values of each model [<xref ref-type="bibr" rid="ref31">31</xref>] and performed invariance testing of class structures across different measurement time points, in order to verify whether the meanings of multimorbidity classes remained consistent over time [<xref ref-type="bibr" rid="ref31">31</xref>]. This step is a critical prerequisite for ensuring that classes retain the same interpretive meaning across time points, thereby enhancing the interpretability and stability of the model [<xref ref-type="bibr" rid="ref33">33</xref>].</p><p>In this study, LTA is used as a population-level structure discovery tool to identify clinically stable multimorbidity phenotypes, rather than as a component optimized for downstream predictive performance. The primary objective of LTA in our framework is to define a consistent and interpretable latent outcome space that reflects the underlying disease co-occurrence structure at the population level. Accordingly, LTA was estimated using the full longitudinal dataset across all available survey waves to maximize phenotype stability and clinical interpretability by leveraging the complete temporal information within the observed follow-up window. Within this 5-wave longitudinal design, the use of all waves enables the identified latent classes and transition patterns to reflect the evolution of multimorbidity structures across the entire observed period, rather than being driven by partial or wave-specific information. This design choice allows the latent class definitions to be less sensitive to sampling variability across individual waves and ensures that the resulting multimorbidity patterns are representative of the population dynamics observed within the study timeframe, rather than tailored to a specific subsample.</p></sec><sec id="s2-3"><title>Proposed CLA-Net Framework</title><sec id="s2-3-1"><title>Multimorbidity Pattern Prediction Dataset</title><p>This study constructed a task-oriented dataset for prospective prediction of multimorbidity patterns based on the CHARLS, a nationally representative longitudinal survey in China. To bridge the gap between population-level pattern discovery and individual-level prediction, we constructed a supervised learning task using LTA-derived latent classes as predictive targets. Since LTA is a probabilistic model, it outputs the posterior probability of an individual belonging to each latent class rather than a deterministic category. To transform these probabilistic outputs into deterministic prediction labels for CLA-Net, we used a maximum posterior probability assignment strategy. Specifically, for each individual <italic>n</italic> at time step <italic>t</italic>, the LTA model calculates a posterior probability vector <bold><italic>P</italic></bold><italic><sub>n,t</sub></italic> = [<italic>p</italic><sub>1</sub>, <italic>p</italic><sub>2</sub>,&#x2026;, <italic>p<sub>K</sub></italic>], where <italic>K</italic> is the number of latent patterns. The ground truth label <italic>y<sub>n,t</sub></italic> is assigned to the class with the highest probability:</p><disp-formula id="E1"><label>(1)</label><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>arg</mml:mi><mml:mo>&#x2061;</mml:mo><mml:munder><mml:mo movablelimits="true" form="prefix">max</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mi>K</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:munder><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>P</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>These deterministic labels (<italic>y<sub>n,t</sub></italic>) serve as the supervisory signals (ground truth) for training the CLA-Net.</p><p>We used 2 consecutive follow-up waves as the input window to predict the multimorbidity pattern at the subsequent wave. In CHARLS, the intervals between survey waves are 2&#x2010;3 years, which align with the slow evolution and long-term accumulation process of chronic diseases. Although the intervals vary slightly, the progression of chronic conditions is characterized by slow variables (ie, gradual progression, stable trends, and limited abrupt changes), making 2&#x2010;3 years sufficient to capture substantial changes in individual health status while reducing noise from short-term fluctuations. This temporal span is clinically reasonable for modeling chronic disease progression. According to the study objectives, we reorganized the raw data using a sliding-window approach to construct 3 temporally progressive subdatasets, each corresponding to a &#x201C;past&#x2013;current &#x2192; future&#x201D; window structure:</p><list list-type="bullet"><list-item><p>Subdataset 1: 2011 features as early input (<bold>x</bold><italic><sub>t&#x2013;1</sub></italic>), 2013 features as current input (<bold>x</bold><italic><sub>t</sub></italic>), and 2015 LTA-derived multimorbidity patterns as prediction labels (<italic>y<sub>t+1</sub></italic>)</p></list-item><list-item><p>Subdataset 2: 2013 features as (<bold>x</bold><italic><sub>t&#x2013;1</sub></italic>), 2015 features as (<bold>x</bold><italic><sub>t</sub></italic>), and 2018 multimorbidity patterns as (<italic>y<sub>t+1</sub></italic>)</p></list-item><list-item><p>Subdataset 3: 2015 features as (<bold>x</bold><italic><sub>t&#x2013;1</sub></italic>), 2018 features as (<bold>x</bold><italic><sub>t</sub></italic>), and 2020 multimorbidity patterns as (<italic>y<sub>t+1</sub></italic>)</p></list-item></list><p>The 3 subdatasets were concatenated by rows to form a unified prediction dataset, structured as triplets (<bold>x</bold><italic><sub>t&#x2013;1,</sub></italic> <bold>x</bold><italic><sub>t,</sub> y<sub>t+1</sub></italic>), where <bold>x</bold><italic><sub>t&#x2013;1</sub></italic> and <bold>x</bold><italic><sub>t</sub></italic> represent feature vectors from 2 consecutive time points, and <italic>y<sub>t+1</sub></italic> denotes the LTA-identified future multimorbidity pattern label. This construction preserves the natural temporal evolution of disease states without relying on long-sequence assumptions, while effectively augmenting the training data. It enables the model to learn multistage health state transitions within the same population, thereby providing a robust data foundation for subsequent individual-level prospective prediction.</p></sec><sec id="s2-3-2"><title>Overview of the Model Architecture</title><p>This study proposes an innovative deep learning model, named the CLA-Net. CLA-Net integrates the sequential state encoding capability of GRU with the global feature interaction advantages of an improved transformer architecture, enabling fine-grained cross-time feature interactions through a bitemporal directed cross-attention mechanism. The model is composed of 4 core modules: an input embedding layer, a temporal GRU encoder, a transformer encoding layer based on the bitemporal directed cross-attention mechanism, and a classifier layer. An overview of the model architecture is shown in <xref ref-type="fig" rid="figure2">Figure 2</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>The architecture of the Cross-Lag Attention Network.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84261_fig02.png"/></fig></sec><sec id="s2-3-3"><title>Components of CLA-Net</title><sec id="s2-3-3-1"><title>Feature Embedding Layer</title><p>The input features include continuous variables (eg, biomarkers and cognitive scores) and binary indicator variables (eg, disease diagnoses and medication use). Socioeconomic variables, including total household income and per capita household consumption, were inspected for extreme values prior to normalization. Given the heavy-tailed distributions of these variables, a percentile-based outlier handling strategy was applied to mitigate the undue influence of extreme observations while preserving the original scale of the data. Min-max normalization was then used to rescale these variables to a common range, ensuring numerical stability and consistency with the scaling of other continuous inputs used in the model. Binary features were kept unchanged. Subsequently, heterogeneous features are mapped into a unified representation space through learnable linear transformations:</p><disp-formula id="E2"><label>(2)</label><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E3"><label>(3)</label><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn1"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="normal">x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>D</mml:mi></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> denotes the raw input features, <inline-formula><mml:math id="ieqn2"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="normal">X</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>denotes the embedded representation, <inline-formula><mml:math id="ieqn3"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>D</mml:mi></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>and <inline-formula><mml:math id="ieqn4"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>are shared parameters, <italic>D</italic> denotes the input dimension, and <italic>d</italic> represents the hidden space dimension.</p></sec><sec id="s2-3-3-2"><title>Temporal GRU Encoder</title><p>After feature embedding, the representations of features from the earlier time point (<bold>X</bold><italic><sub>t&#x2013;1</sub></italic>) and the current time point (<bold>X</bold><italic><sub>t</sub></italic>) are concatenated into a 2-step sequence, which is then fed into a GRU for temporal modeling. Through the update mechanism of its hidden states, the GRU efficiently captures the sequential evolution of the disease process, ensuring that the nonlinear dynamics of health state transitions are explicitly modeled. This design enables the network to capture the critical short-term progression from <italic>t</italic>&#x2013;1 to <italic>t</italic> and to explicitly model the nonlinear dynamics underlying near-term health state transitions.</p><disp-formula id="E4"><label>(4)</label><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="normal">h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">U</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">X</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">h</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E5"><label>(5)</label><mml:math id="eqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="normal">h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">U</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">X</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <bold>h</bold><italic><sub>init</sub></italic> denotes the initial hidden state, and <bold>h</bold><italic><sub>t&#x2013;1</sub></italic> and <bold>h</bold><italic><sub>t</sub></italic> represent the hidden state outputs at the 2 time steps. The internal computational mechanism of the GRU is defined as follows:</p><disp-formula id="E6"><label>(6)</label><mml:math id="eqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="normal">r</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="normal">W</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi mathvariant="normal">X</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">U</mml:mi></mml:mrow><mml:mi>r</mml:mi></mml:msub><mml:msub><mml:mrow><mml:mi mathvariant="normal">h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">b</mml:mi></mml:mrow><mml:mi>r</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E7"><label>(7)</label><mml:math id="eqn7"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="normal">z</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="normal">W</mml:mi></mml:mrow><mml:mi>z</mml:mi></mml:msub><mml:msub><mml:mrow><mml:mi mathvariant="normal">X</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">U</mml:mi></mml:mrow><mml:mi>z</mml:mi></mml:msub><mml:msub><mml:mrow><mml:mi mathvariant="normal">h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">b</mml:mi></mml:mrow><mml:mi>z</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E8"><label>(8)</label><mml:math id="eqn8"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="normal">h</mml:mi></mml:mrow><mml:mo stretchy="false">~</mml:mo></mml:mover></mml:mrow><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>tanh</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="normal">W</mml:mi></mml:mrow><mml:mi>h</mml:mi></mml:msub><mml:msub><mml:mrow><mml:mi mathvariant="normal">X</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">U</mml:mi></mml:mrow><mml:mi>h</mml:mi></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="normal">r</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msub><mml:mo>&#x2299;</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">b</mml:mi></mml:mrow><mml:mi>h</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E9"><label>(9)</label><mml:math id="eqn9"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="normal">h</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2299;</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">z</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msub><mml:mo>&#x2299;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="normal">h</mml:mi></mml:mrow><mml:mo stretchy="false">~</mml:mo></mml:mover></mml:mrow><mml:mi>t</mml:mi></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Here, <bold>X</bold><italic><sub>t</sub></italic> denotes the input at time step <italic>t</italic>. <bold>r</bold><italic><sub>t</sub></italic> is the reset gate, which controls how much information from the previous time step is retained. <bold>z</bold><italic><sub>t</sub></italic> is the update gate, which determines the proportion of information to be updated at the current time step. <inline-formula><mml:math id="ieqn5"><mml:mover accent="true"><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="bold">h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>~</mml:mo></mml:mover></mml:math></inline-formula> represents the candidate&#x2019;s hidden state. <inline-formula><mml:math id="ieqn6"><mml:mi>&#x03C3;</mml:mi><mml:mi> </mml:mi></mml:math></inline-formula>is the sigmoid activation function, and <inline-formula><mml:math id="ieqn7"><mml:mo>&#x2299;</mml:mo></mml:math></inline-formula> denotes elementwise multiplication. <bold>W</bold><italic><sub>*</sub></italic> and <bold>U</bold><italic><sub>*</sub></italic> are learnable weight matrices, and <bold>b</bold><italic><sub>*</sub></italic> is the bias vector.</p></sec><sec id="s2-3-3-3"><title>Transformer Encoding Layer</title><sec id="s2-3-3-3-1"><title>Bitemporal Directed Cross-Attention Mechanism</title><p>Although GRUs are effective at encoding immediate longitudinal context via gated state updates, they have limited capacity to explicitly characterize fine-grained interactions among heterogeneous features, especially when the predictive signal arises from short-term, nonlinear state transitions rather than long-range history. In our setting, an individual&#x2019;s future multimorbidity pattern is shaped not only by the recent state representation but also by complex cross-feature influences that operate across adjacent time points. To address this, we adopt an asymmetric attention paradigm, in which features from the current time point are used as the query vector <bold>Q</bold>, while features from the earlier time point serve as the keys (<bold>K</bold>) and values (<bold>V</bold>) in the attention computation, enabling the model to attend to historical representations when updating the current state. This design reflects the directional interactions between features across time. For each attention head <italic>j</italic> &#x2208; {1,...,<italic>H</italic>}, the GRU outputs are first projected into the <bold>Q</bold>, <bold>K</bold>, and <bold>V</bold> spaces:</p><disp-formula id="E10"><label>(10)</label><mml:math id="eqn10"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mml:mtr><mml:mtd><mml:msub><mml:mi>Q</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:msubsup><mml:mi>W</mml:mi><mml:mi>Q</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>K</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msubsup><mml:mi>W</mml:mi><mml:mi>K</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>V</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msubsup><mml:mi>W</mml:mi><mml:mi>V</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"/></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <italic>Q</italic><sub><italic>j</italic></sub>, <italic>K</italic><sub><italic>j</italic></sub>, and <italic>V</italic><sub><italic>j</italic></sub> denote the query, key, and value representations of the <italic>j</italic>th attention head, which are vector-valued representations derived from the input hidden states. <inline-formula><mml:math id="ieqn8"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mi>W</mml:mi><mml:mi>Q</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msubsup><mml:mi>W</mml:mi><mml:mi>K</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msubsup><mml:mi>W</mml:mi><mml:mi>V</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mi>h</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> denotes the projection matrix of the <italic>j</italic>th head, <italic>d</italic> is the input dimension, and <italic>d</italic><sub>h</sub>=<italic>d/H</italic> is the dimension of each attention head. The attention scores are computed using scaled dot-product attention:</p><disp-formula id="E11"><label>(11)</label><mml:math id="eqn11"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>Q</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:msubsup><mml:mi>K</mml:mi><mml:mi>j</mml:mi><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:msqrt><mml:msub><mml:mi>d</mml:mi><mml:mi>h</mml:mi></mml:msub></mml:msqrt></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>The scaling factor <inline-formula><mml:math id="ieqn9"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mn>1</mml:mn><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:msqrt><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:msqrt></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is critical for maintaining gradient stability. Without scaling, the dot-product values increase with feature dimensionality, which may push the SoftMax function into regions with extremely small gradients. The computation for each attention head is defined as</p><disp-formula id="E12"><label>(12)</label><mml:math id="eqn12"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="normal">h</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>Q</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>K</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>V</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="normal">S</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">f</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">M</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">x</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>Q</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:msubsup><mml:mi>K</mml:mi><mml:mi>j</mml:mi><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:msqrt><mml:msub><mml:mi>d</mml:mi><mml:mi>h</mml:mi></mml:msub></mml:msqrt></mml:mfrac><mml:mo>)</mml:mo></mml:mrow><mml:msub><mml:mi>V</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>After concatenating the outputs of all attention heads, a linear transformation is applied to obtain the multi-head cross-attention output:</p><disp-formula id="E13"><label>(13)</label><mml:math id="eqn13"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtext>MultiHeadCrossAttn</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mtext>Concat</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">h</mml:mi><mml:mi mathvariant="bold">e</mml:mi><mml:mi mathvariant="bold">a</mml:mi><mml:mi mathvariant="bold">d</mml:mi></mml:mrow><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">h</mml:mi><mml:mi mathvariant="bold">e</mml:mi><mml:mi mathvariant="bold">a</mml:mi><mml:mi mathvariant="bold">d</mml:mi></mml:mrow><mml:mi>H</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>W</mml:mi><mml:mi>o</mml:mi></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <bold>W</bold><italic><sub>O</sub></italic> denotes the output projection matrix. Through the multihead mechanism, the model can learn distinct attention patterns in different representation subspaces, allowing it to attend to diverse types of feature relationships simultaneously and thereby enhancing its expressive capacity.</p></sec><sec id="s2-3-3-3-2"><title>Stacked Transformer Encoder Architecture</title><p>To capture the hierarchical patterns of disease progression, we use a stack of <italic>L</italic> transformer encoder layers. Each layer builds upon the representations from the previous one, thereby progressively modeling more complex temporal dependencies. Each layer, <italic>l</italic> &#x2208; {1,..,<italic>L</italic>} consists of 2 sublayers with residual connections. Specifically, Equation 14 corresponds to the bitemporal directed cross-attention mechanism sublayer, while Equation 15 represents the positionwise feed-forward network (FFN) sublayer:</p><disp-formula id="E14"><label>(14)</label><mml:math id="eqn14"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msup><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">z</mml:mi></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>l</mml:mi><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mtext>LayerNorm</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi mathvariant="bold">z</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:mtext>Dropout</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mtext>MultiHeadCrossAttn</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msup><mml:mrow><mml:mi mathvariant="bold">z</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E15"><label>(15)</label><mml:math id="eqn15"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msup><mml:mrow><mml:mi mathvariant="bold">z</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>l</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">y</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">N</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">m</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">z</mml:mi></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>l</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">D</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">u</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow><mml:mrow><mml:mo maxsize="1.2em" minsize="1.2em">(</mml:mo></mml:mrow><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">z</mml:mi></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>l</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mo maxsize="1.2em" minsize="1.2em">)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Here, the positionwise FFN uses the ReLU activation function and incorporates dropout to prevent overfitting, defined as</p><disp-formula id="E16"><label>(16)</label><mml:math id="eqn16"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">U</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <bold>W</bold><sub>1</sub> is the weight matrix of the first feed-forward layer, which expands the feature dimension by a factor of 4 to enhance nonlinear representational capacity. <bold>W</bold><sub>2</sub> is the weight matrix of the second feed-forward layer, which projects the expanded dimension back to the original size <italic>d</italic>. <bold>b</bold><sub>1</sub> and <bold>b</bold><sub>2</sub> denote the bias vectors for the first and second layers, respectively. After passing through <italic>L</italic> stacked layers, we obtain the final fused representation <bold>z</bold><sup>(</sup><italic><sup>L</sup></italic><sup>)</sup>, which simultaneously encodes early and later temporal information of patients and is further enriched through direct cross-time feature interactions.</p></sec></sec></sec></sec><sec id="s2-4"><title>Classifier Layer</title><p>The fused features <bold>z</bold><sup>(</sup><italic><sup>L</sup></italic><sup>)</sup> output by the transformer encoder are fed into a 2-layer fully connected neural network as the classifier. This classifier consists of a linear layer, a ReLU activation function, a dropout layer, and a final linear output layer, which maps the fused features into probability distributions over multimorbidity patterns.</p><sec id="s2-4-1"><title>First Layer: Linear Mapping and Nonlinear Activation</title><p>The first layer of the classifier consists of a linear mapping followed by a nonlinear activation function. This step is crucial for transforming the fused features into a more complex representation. The output of this layer is computed as follows:</p><disp-formula id="E17"><label>(17)</label><mml:math id="eqn17"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="bold">o</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">U</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi mathvariant="bold">z</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>L</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:msubsup><mml:mi>W</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msubsup><mml:mi>b</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn10"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mi mathvariant="bold">W</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn11"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mi mathvariant="bold">b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are the weight matrix and bias vector of the first layer, respectively, and ReLU denotes the activation function. A dropout regularization is applied after this layer to reduce the risk of overfitting.</p></sec><sec id="s2-4-2"><title>Second Layer: Output Mapping</title><p>The second layer of the classifier maps the transformed features to output logits, representing the class scores. This layer computes the logits as follows:</p><disp-formula id="E18"><label>(18)</label><mml:math id="eqn18"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="bold">y</mml:mi></mml:mrow><mml:mrow><mml:mtext>logits</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="bold">o</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:msubsup><mml:mi>W</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msubsup><mml:mi>b</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn12"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mi mathvariant="bold">W</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>C</mml:mi></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn13"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mi mathvariant="bold">b</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> represent the weight matrix and bias vector of the second layer, respectively, and <italic>C</italic> denotes the number of output classes. The final prediction <bold>y</bold><sub>logits</sub> is obtained by applying the SoftMax function to transform the outputs into probabilities:</p><disp-formula id="E19"><label>(19)</label><mml:math id="eqn19"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="normal">S</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">f</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">M</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">x</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mtext>logits</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>exp</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mtext>logits</mml:mtext><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:msup><mml:mi>c</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:munderover><mml:mi>exp</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mtext>logits</mml:mtext><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msup><mml:mi>c</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula></sec></sec><sec id="s2-5"><title>Loss Function</title><p>We trained the model using the standard cross-entropy loss function for multiclass, single-label classification. Based on the predicted probability distribution <inline-formula><mml:math id="ieqn14"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> output by the classifier, the cross-entropy loss is defined as</p><disp-formula id="E20"><label>(20)</label><mml:math id="eqn20"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mi>log</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <italic>C</italic> denotes the number of classes, <inline-formula><mml:math id="ieqn15"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>is the one-hot encoded true label of the <italic>i</italic>th sample for class <italic>c</italic>, and <inline-formula><mml:math id="ieqn16"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>represents the predicted probability that the <italic>i</italic>th sample belongs to class <italic>c</italic>. The objective is to minimize the average loss across all training samples. Cross-entropy is particularly suitable for our multiclass single-label task because it encourages the network to assign high probability to the correct class while penalizing confident but incorrect predictions. Since the dataset has a relatively balanced class distribution, no class weighting was applied in the loss function. The use of SoftMax outputs with cross-entropy is equivalent to maximizing the likelihood of the correct class and is a standard choice for neural network classifiers.</p><p>Algorithm 1 in <xref ref-type="other" rid="box1">Textbox 1</xref> presents the pseudocode of the CLA-Net model. The algorithm details how each component is sequentially executed on each batch of data.</p><boxed-text id="box1"><title> Algorithm 1: CLA-Net (Cross-Lag Attention Network) model.</title><p><bold>Input</bold>:</p><p><inline-formula><mml:math id="ieqn17"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>: Features at time <italic>t</italic> (continuous features <inline-formula><mml:math id="ieqn18"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> , discrete features <inline-formula><mml:math id="ieqn19"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>c</mml:mi></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>)</p><p><italic>x<sub>t</sub></italic><sub>&#x2013;1</sub>: Features at time t&#x2013;1 (continuous features  <inline-formula><mml:math id="ieqn20"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> , discrete features <inline-formula><mml:math id="ieqn21"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>c</mml:mi></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>)</p><p>y: Label (5-class classification)</p><p><bold>Output</bold>: Predicted probability distribution <bold>p</bold>, cross-entropy loss <italic>L</italic></p><p>Process:</p><p>1: // <italic>1. Feature preprocessing and embedding</italic></p><p>2: <inline-formula><mml:math id="ieqn22"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">E</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">b</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">d</mml:mi><mml:mi mathvariant="normal">d</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">g</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">d</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">c</mml:mi></mml:mrow></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> <italic>// Embedding of discrete features</italic></p><p>3: <inline-formula><mml:math id="ieqn23"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">E</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">b</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">d</mml:mi><mml:mi mathvariant="normal">d</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">g</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">d</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">c</mml:mi></mml:mrow></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p><p>4: <inline-formula><mml:math id="ieqn24"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">y</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">N</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">m</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> <italic>// Normalization of continuous features</italic></p><p>5: <inline-formula><mml:math id="ieqn25"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">y</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">N</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">m</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p><p>6:<inline-formula><mml:math id="ieqn26"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">r</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> <italic>// Concatenation of features at time t&#x2212;1</italic></p><p>7: <inline-formula><mml:math id="ieqn27"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">r</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> <italic>// Concatenation of features at time t</italic></p><p>8: <inline-formula><mml:math id="ieqn28"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p><p>9: <inline-formula><mml:math id="ieqn29"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p><p>10: <italic>// 2. GRU-based temporal encoding</italic></p><p>11:<inline-formula><mml:math id="ieqn30"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">U</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> <italic>// Hidden state at time t&#x2212;1</italic></p><p>12: <inline-formula><mml:math id="ieqn31"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">U</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> <italic>// Hidden state at time t</italic></p><p>13: <italic>// 3. Transformer encoder based on the bitemporal directed cross-attention mechanism</italic></p><p>14: for do</p><p>15: for head <inline-formula><mml:math id="ieqn32"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> do</p><p>16: <inline-formula><mml:math id="ieqn33"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>Q</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">&#x2190;</mml:mo><mml:msubsup><mml:mi>W</mml:mi><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:msup><mml:mi>K</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">&#x2190;</mml:mo><mml:msubsup><mml:mi>W</mml:mi><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:msup><mml:mi>V</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">&#x2190;</mml:mo><mml:msubsup><mml:mi>W</mml:mi><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi mathvariant="normal">h</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p><p>17: <inline-formula><mml:math id="ieqn34"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">S</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">f</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">x</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mrow><mml:msup><mml:mi>Q</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>K</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mi mathvariant="normal">&#x22A4;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:msqrt><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:msqrt></mml:mfrac><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> <italic>// Attention weights</italic></p><p>18:<inline-formula><mml:math id="ieqn35"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mrow><mml:mi mathvariant="normal">h</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">&#x2190;</mml:mo><mml:msup><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:msup><mml:mi>V</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> <italic>// Context vector</italic></p><p>19: end for</p><p>20: <inline-formula><mml:math id="ieqn36"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>Z</mml:mi><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi mathvariant="normal">h</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="normal">h</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>H</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p><p>21: <inline-formula><mml:math id="ieqn37"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>U</mml:mi><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">y</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">N</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">m</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">D</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">u</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>Z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> <italic>// Residual connection and normalization</italic></p><p>22: <inline-formula><mml:math id="ieqn38"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">y</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">N</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">m</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>U</mml:mi><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">D</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">u</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>U</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p><p>23: end for</p><p>24: <inline-formula><mml:math id="ieqn39"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mtext>Let&#x00A0;</mml:mtext><mml:mi>z</mml:mi><mml:mo stretchy="false">&#x2190;</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula><italic>// final fused representation</italic></p><p>25: <italic>// 4. Classification and prediction</italic></p><p>26: <inline-formula><mml:math id="ieqn40"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>o</mml:mi><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p><p>27: <inline-formula><mml:math id="ieqn41"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>p</mml:mi><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mrow><mml:mi mathvariant="normal">S</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">f</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">x</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mi>o</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula><italic>// Output probability distribution</italic></p><p>28: <inline-formula><mml:math id="ieqn42"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mstyle><mml:mi>L</mml:mi><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mo>&#x2212;</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow></mml:munderover><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mi>log</mml:mi><mml:mo>&#x2061;</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> <italic>// Cross-entropy loss</italic></p><p>29: return <bold>p</bold>, <italic>L</italic></p></boxed-text></sec><sec id="s2-6"><title>Training Configuration and Implementation Details</title><p>All experiments were conducted using PyTorch v1.7.0 (Facebook AI Research, Meta Platforms) and Python 3.8 (Python Software Foundation) on Ubuntu 18.04 (Canonical Ltd), with an NVIDIA RTX 4090 GPU (24 GB memory) and 32 GB RAM (CUDA 11.0). The proposed model and all baselines were trained under identical configurations and hyperparameters (<xref ref-type="table" rid="table1">Table 1</xref>). The main settings were 60 training epochs, mini-batch size of 64, embedding and GRU hidden dimensions set to 128, Adam optimizer with a learning rate of 0.0001 (determined via grid search), cross-entropy loss, and a dropout rate of 0.3 applied to both transformer layers and the classifier. Model weights were initialized using PyTorch defaults. During data preprocessing, feature normalization was fitted only on the training set and then applied to the validation and test sets. The dataset was partitioned into 80% training and 20% testing sets using a subject-wise splitting strategy based on unique individual identifiers. This approach ensures that all longitudinal observations from the same participant were assigned exclusively to a single subset, thereby preventing potential data leakage. From the training set, 20% (n/N) of the individuals were further held out as a validation set. During training, the macro-<italic>F</italic><sub>1</sub>-score on the validation set was used as the criterion for early stopping and model selection. Only the best-performing model on the validation set was retained, and a final evaluation was conducted on the independent test set.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Training hyperparameters.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Parameters or hyperparameters</td><td align="left" valign="bottom">Value</td></tr></thead><tbody><tr><td align="left" valign="top">Epoch</td><td align="left" valign="top">60</td></tr><tr><td align="left" valign="top">Learning rate</td><td align="left" valign="top">0.0001</td></tr><tr><td align="left" valign="top">Dropout</td><td align="left" valign="top">0.3</td></tr><tr><td align="left" valign="top">Random seed</td><td align="left" valign="top">42</td></tr><tr><td align="left" valign="top">Batch size</td><td align="left" valign="top">64</td></tr><tr><td align="left" valign="top">Hidden dimension</td><td align="left" valign="top">128</td></tr></tbody></table></table-wrap></sec><sec id="s2-7"><title>Evaluation Metrics</title><p>To evaluate the performance of the model in the multiclass classification task, we used commonly used metrics, including accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score, and reported their macroaveraged values. Macroaveraging assigns equal weight to each class, thereby avoiding the bias of microaveraging, which tends to favor majority classes in imbalanced datasets. This provides a more comprehensive and balanced assessment of the model&#x2019;s ability to identify all 5 multimorbidity patterns. The formulas for these evaluation metrics are as follows:</p><disp-formula id="E21"><label>(21)</label><mml:math id="eqn21"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">u</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">y</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mstyle displaystyle="true" scriptlevel="0"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:munderover><mml:mi>T</mml:mi><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mstyle><mml:mstyle displaystyle="true" scriptlevel="0"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:msub><mml:mi>N</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:msub><mml:mi>N</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E22"><label>(22)</label><mml:math id="eqn22"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E23"><label>(23)</label><mml:math id="eqn23"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:msub><mml:mi>N</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E24"><label>(24)</label><mml:math id="eqn24"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>F</mml:mi><mml:munder><mml:mrow/><mml:mn>1</mml:mn></mml:munder><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <italic>TP<sub>c</sub></italic>, <italic>FP<sub>c</sub></italic>, <italic>FN<sub>c</sub></italic>, and <italic>TN<sub>c</sub></italic> denote the true positives, false positives, false negatives, and true negatives for class <italic>c</italic>, respectively. Accuracy represents the proportion of correctly predicted samples to the total number of samples. Precision measures the proportion of samples predicted as a given class that actually belong to that class. Recall indicates the proportion of samples that are correctly identified among those that truly belong to a given class. The <italic>F</italic><sub>1</sub>-score, defined as the harmonic mean of precision and recall, provides a balanced measure of the model&#x2019;s classification performance for a given class. Macroaveraging is obtained by taking the arithmetic mean of the metric values across all classes. The formulas are defined as follows:</p><disp-formula id="E25"><label>(25)</label><mml:math id="eqn25"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="right left right left right left right left right left right left" rowspacing="0.9em 0.9em 0.3em" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"><mml:mtr><mml:mtd><mml:mrow><mml:mi mathvariant="normal">M</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:mtd><mml:mtd><mml:mi/><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>C</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mrow><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:mi mathvariant="normal">M</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow></mml:mtd><mml:mtd><mml:mi/><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>C</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mrow><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:mi mathvariant="normal">M</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi mathvariant="normal">F</mml:mi><mml:munder><mml:mrow/><mml:mn>1</mml:mn></mml:munder></mml:mrow></mml:mtd><mml:mtd><mml:mi/><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>C</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:munder><mml:mrow/><mml:mn>1</mml:mn></mml:munder></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"/></mml:mrow></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>In addition, we further introduced the receiver operating characteristic (ROC) curve and the area under the curve (AUC) as complementary evaluation metrics. While accuracy focuses on the correctness of class predictions, AUC assesses the ranking quality of predicted probabilities. Particularly in multiclass problems, a higher macroaveraged AUC together with higher accuracy provides stronger evidence that the model can achieve good discrimination across all classes, rather than merely benefiting from class size distributions. The ROC curve is plotted with the false positive rate (FPR) on the <italic>x</italic>-axis and the true positive rate (TPR) on the <italic>y</italic>-axis and is mathematically defined as:</p><disp-formula id="E26"><label>(26)</label><mml:math id="eqn26"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">R</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">R</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>AUC measures the overall discriminative ability of a model, with values ranging from 0 to 1, where values closer to 1 indicate stronger separation between positive and negative classes. Commonly, AUC &#x003E;0.9 is considered excellent, 0.8&#x2010;0.9 good, 0.7&#x2010;0.8 acceptable, and &#x003C;0.7 poor. For multiclass prediction, we adopted a one-vs-rest strategy, treating each class as positive against all others to compute class-specific ROC curves and AUC values. To validate the effectiveness of the proposed model, we compared it with multiple baselines under identical test conditions and reported AUC as a key performance metric. The evaluation procedures followed recommended practices for multiclass classification to ensure reliable assessment of predictive performance.</p><p>In addition to AUC-ROC, we also reported AU-PRC (area under the precision-recall curve), which provides a complementary evaluation of model performance by emphasizing the trade-off between precision and recall. AU-PRC is particularly informative in settings with class imbalance, as it focuses on the model&#x2019;s ability to correctly identify positive instances without being overly influenced by true negatives. Similar to the ROC-based evaluation, AU-PRC was computed using a one-vs-rest strategy for multiclass prediction, and class-specific precision-recall (PR) curves were aggregated to assess overall performance.</p></sec><sec id="s2-8"><title>Baseline Models</title><sec id="s2-8-1"><title>Logistic Regression</title><p>A linear classification model that estimates class probabilities through a logistic function, serving as a simple and interpretable baseline to assess the benefit of nonlinear feature interactions [<xref ref-type="bibr" rid="ref34">34</xref>].</p></sec><sec id="s2-8-2"><title>Support Vector Machine</title><p>A margin-based classifier that constructs an optimal separating hyperplane in the feature space, enabling the evaluation of nonlinear decision boundaries through kernel-based learning [<xref ref-type="bibr" rid="ref34">34</xref>].</p></sec><sec id="s2-8-3"><title>Random Forest</title><p>An ensemble learning method that aggregates multiple decision trees trained on bootstrapped samples, capturing nonlinear feature relationships and improving robustness against overfitting [<xref ref-type="bibr" rid="ref35">35</xref>].</p></sec><sec id="s2-8-4"><title>XGBoost</title><p>A gradient boosting framework that builds additive tree-based models in a stagewise manner, designed to optimize predictive performance by modeling complex nonlinear interactions among features [<xref ref-type="bibr" rid="ref36">36</xref>].</p></sec><sec id="s2-8-5"><title>Convolutional Neural Network</title><p>Extracts local temporal features via convolutional kernels without explicitly modeling temporal order, serving to test the importance of temporal structure [<xref ref-type="bibr" rid="ref37">37</xref>].</p></sec><sec id="s2-8-6"><title>Long Short-Term Memory</title><p>Captures long-term dependencies through gating mechanisms, used to compare different recurrent units for short-sequence modeling [<xref ref-type="bibr" rid="ref38">38</xref>].</p></sec><sec id="s2-8-7"><title>Transformer</title><p>Models relationships across time steps using self-attention, with concatenated features fed into a standard transformer encoder [<xref ref-type="bibr" rid="ref39">39</xref>].</p></sec><sec id="s2-8-8"><title>PatchTST</title><p>Reduces computational complexity through a &#x201C;patching&#x201D; strategy, improving efficiency for long-sequence prediction [<xref ref-type="bibr" rid="ref40">40</xref>].</p></sec><sec id="s2-8-9"><title>iTransformer</title><p>Applies attention along the feature dimension rather than the temporal dimension, better capturing inter-variable relationships in multivariate time series [<xref ref-type="bibr" rid="ref41">41</xref>].</p></sec><sec id="s2-8-10"><title>Mamba</title><p>Mamba uses a selective state space mechanism to model long-term dependencies with linear complexity [<xref ref-type="bibr" rid="ref42">42</xref>].</p></sec><sec id="s2-8-11"><title>MambaTS</title><p>An enhanced variant of Mamba that incorporates improved state space mechanisms to further boost long-sequence prediction performance [<xref ref-type="bibr" rid="ref43">43</xref>].</p></sec><sec id="s2-8-12"><title>Long Short-Term Memory+Transformer</title><p>A variant of CLA-Net that replaces the GRU encoder with a long short-term memory (LSTM), used to compare the effects of different recurrent units.</p><p>To ensure a fair comparison with the deep learning models, the input configuration for all traditional machine learning baselines (logistic regression, support vector machine, random forest, and XGBoost) was aligned with the dual-time-point setup of CLA-Net. Specifically, the feature vectors from the historical time point (<inline-formula><mml:math id="ieqn43"><mml:msub><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula>) and the current time point (<inline-formula><mml:math id="ieqn44"><mml:msub><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>) were concatenated to form a single flattened input vector [<inline-formula><mml:math id="ieqn45"><mml:msub><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula>, <inline-formula><mml:math id="ieqn46"><mml:msub><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>]. This ensures that all models use the exact same longitudinal information, preventing any performance bias due to information asymmetry.</p></sec></sec><sec id="s2-9"><title>Ethical Considerations</title><p>Ethical approval for the original data collection in the CHARLS project was granted by the Biomedical Ethics Review Committee of Peking University (approval number: IRB00001052-11015). All participants provided written informed consent at the time of enrollment. As this study involves a secondary analysis of publicly available, deidentified data, the requirement for additional informed consent was waived. To ensure privacy and confidentiality, all direct identifiers were removed from the dataset by the CHARLS team prior to public release. Furthermore, this paper and its supplementary materials do not contain any images or information that could identify individual participants.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Dataset</title><p>This study adopted a longitudinal cohort design using publicly available data from the CHARLS. CHARLS is a nationally representative longitudinal survey covering 28 provinces, 150 counties and districts, and 450 communities, and provides extensive demographic, chronic disease, and other health-related information. Using 2011 as the baseline, we included respondents with at least 2 chronic conditions and complete demographic data who participated in all 5 waves, yielding 3644 individuals and 18,220 person-wave observations. The sample selection process is shown in <xref ref-type="fig" rid="figure3">Figure 3</xref>. Attrition during follow-up occurred due to both mortality and nonmortality loss to follow-up. Specifically, the without-follow-up observations (30,483 person-wave observations) corresponded to 1617 person-wave observations attributable to death and 28,866 person-wave observations attributable to other reasons for nonparticipation.</p><p>Data preprocessing included logical error correction and outlier removal. Regarding missing value treatment, to strictly prevent data leakage, we adhered to a rigorous &#x201C;split-then-impute&#x201D; strategy. The dataset was first partitioned into training and independent test sets. Subsequently, missing values were handled using the MiceForest method for multiple imputation [<xref ref-type="bibr" rid="ref44">44</xref>]. Crucially, the imputation model was fitted exclusively on the training set. The MiceForest method is based on random forests and predictive mean matching, allowing it to capture nonlinear relationships and improve imputation accuracy [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. To ensure data quality, variables with more than 50% missingness were excluded, and only those with relatively low missingness were retained [<xref ref-type="bibr" rid="ref45">45</xref>].</p><p>The final dataset variables are summarized in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> and include (1) individual identifiers and survey time (ID and year); (2) 14 chronic disease variables used for LTA-based identification and transition modeling of multimorbidity patterns, which also served as target variables in the prediction task; and (3) symptom variables along with demographic, behavioral, psychological, socioeconomic, and macropolicy features, which were used as input features for the prediction model. A final cohort of 3644 individuals with multimorbidity who maintained participation across all 5 survey waves was included in the analysis. <xref ref-type="table" rid="table2">Table 2</xref> presents the baseline sociodemographic characteristics of the participants (recorded in 2011). The cohort was predominantly female (n=2132, 58.51%) and comprised mainly middle-aged and older adults, with 93.33% (n=3401) of participants aged between 45 and 74 years. In terms of socioeconomic status, the sample was characterized by relatively low educational attainment, as 90.12% (n=3284) of participants had an education level below junior high school. The majority were married (n=3283, 90.09%) and resided in rural areas (n=2306, 63.28%), while less than half (n=1550, 42.54%) were covered by pension insurance.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Participant selection flowchart. CHARLS: China Health and Retirement Longitudinal Study.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84261_fig03.png"/></fig><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Demographic characteristics.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variables</td><td align="left" valign="bottom">Values</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Gender, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Men</td><td align="left" valign="top">1512 (41.49)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Women</td><td align="left" valign="top">2132 (58.51)</td></tr><tr><td align="left" valign="top">Age (years), n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x003C;18</td><td align="left" valign="top">1 (0.03)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>18&#x2010;44</td><td align="left" valign="top">65 (1.78)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>45&#x2010;59</td><td align="left" valign="top">1881 (51.62)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>60&#x2010;74</td><td align="left" valign="top">1520 (41.71)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x2265;75</td><td align="left" valign="top">177 (4.86&#xFF09;</td></tr><tr><td align="left" valign="top" colspan="2">Education level, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Below junior high school</td><td align="left" valign="top">3284 (90.12)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>High school or vocational training</td><td align="left" valign="top">313 (8.59)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Higher education</td><td align="left" valign="top">47 (1.29)</td></tr><tr><td align="left" valign="top" colspan="2">Marital status, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Unmarried</td><td align="left" valign="top">361 (9.91)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Married</td><td align="left" valign="top">3283 (90.09)</td></tr><tr><td align="left" valign="top" colspan="2">Residence, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Rural</td><td align="left" valign="top">2306 (63.28)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Urban</td><td align="left" valign="top">1338 (36.72)</td></tr><tr><td align="left" valign="top" colspan="2">Pension coverage, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No</td><td align="left" valign="top">2094 (57.46)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yes</td><td align="left" valign="top">1550 (42.54)</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>LTA-Based Identification of Multimorbidity Patterns</title><p>To clarify the consistency and heterogeneity of multimorbidity pattern structures across different time points, we first conducted LCA at each wave to provide the basis for subsequent LTA. The model fit results (<xref ref-type="table" rid="table3">Table 3</xref>) indicated that the 5-class solution performed best across multiple criteria: at T1, T2, T3, and T5, it achieved the lowest AIC and SaBIC values with good classification quality (entropy=0.838&#x2010;0.926); at T4, although the 4-class and 5-class solutions showed comparable entropy, the 5-class model outperformed in AIC, BIC, and SaBIC. The BLRT consistently supported the 5-class solution across all time points (all <italic>P</italic>&#x003C;.001). Moreover, the class distributions of the 5-class solution were relatively stable and clinically plausible across waves. Taken together, the 5-class model was selected as the optimal solution for the LTA.</p><p>After establishing the 5-class structure at each time point, we tested measurement invariance to ensure the validity of cross-time comparisons. As shown in <xref ref-type="table" rid="table4">Table 4</xref>, constraining item-response probabilities to be equal across time did not significantly worsen model fit compared to the freely estimated model (&#x2206;<italic>&#x03C7;</italic>&#x00B2;<sub>75</sub>=88.7; <italic>P</italic>=.14). This nonsignificant likelihood ratio test confirmed measurement invariance, indicating that the latent classes maintained consistent meanings across all 5 waves.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Latent class analysis results for multimorbidity patterns at 5 time points based on the CHARLS (China Health and Retirement Longitudinal Study) database.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Time and class</td><td align="left" valign="bottom">Percent (%)</td><td align="left" valign="bottom">AIC<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="bottom">BIC<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="bottom">SaBIC<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="bottom">Entropy</td><td align="left" valign="bottom">BLRT<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="8">T1</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">2</td><td align="left" valign="top">40.5/59.5</td><td align="left" valign="top">4196.03</td><td align="left" valign="top">4375.85</td><td align="left" valign="top">4283.70</td><td align="left" valign="top">0.720</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">3</td><td align="left" valign="top">47.8/16.5/35.8</td><td align="left" valign="top">3598.93</td><td align="left" valign="top">3871.77</td><td align="left" valign="top">3731.96</td><td align="left" valign="top">0.802</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">4</td><td align="left" valign="top">21.4/15.6/34.8/28.2</td><td align="left" valign="top">3378.95</td><td align="left" valign="top">3744.80</td><td align="left" valign="top">3557.32</td><td align="left" valign="top">0.839</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">5</td><td align="left" valign="top">11.6/15.3/39.8/9.4/23.9</td><td align="left" valign="top">3230.96</td><td align="left" valign="top">3782.83</td><td align="left" valign="top">3500.03</td><td align="left" valign="top">0.926</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top">17.1/15.0/32.5/19.4/14.4/1.6</td><td align="left" valign="top">3293.62</td><td align="left" valign="top">3752.49</td><td align="left" valign="top">3517.35</td><td align="left" valign="top">0.856</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top" colspan="8">T2</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">2</td><td align="left" valign="top">37.7/62.3</td><td align="left" valign="top">4083.43</td><td align="left" valign="top">4263.25</td><td align="left" valign="top">4171.11</td><td align="left" valign="top">0.711</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">3</td><td align="left" valign="top">46.0/15.3/38.6</td><td align="left" valign="top">3498.78</td><td align="left" valign="top">3771.62</td><td align="left" valign="top">3631.81</td><td align="left" valign="top">0.803</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">4</td><td align="left" valign="top">10.7/15.1/40.3/33.8</td><td align="left" valign="top">3285.65</td><td align="left" valign="top">3651.50</td><td align="left" valign="top">3464.03</td><td align="left" valign="top">0.860</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">5</td><td align="left" valign="top">11.1/13.6/39.5/2.1/33.7</td><td align="left" valign="top">3197.59</td><td align="left" valign="top">3656.45</td><td align="left" valign="top">3421.32</td><td align="left" valign="top">0.889</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top">14.7/12.8/34.9/7.4/27.6/2.6</td><td align="left" valign="top">3167.49</td><td align="left" valign="top">3719.36</td><td align="left" valign="top">3436.56</td><td align="left" valign="top">0.878</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top" colspan="8">T3</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">2</td><td align="left" valign="top">36.6/63.4</td><td align="left" valign="top">4199.14</td><td align="left" valign="top">4378.97</td><td align="left" valign="top">4286.82</td><td align="left" valign="top">0.701</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">3</td><td align="left" valign="top">39.5/12.7/47.8</td><td align="left" valign="top">3603.58</td><td align="left" valign="top">3876.42</td><td align="left" valign="top">3736.61</td><td align="left" valign="top">0.800</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">4</td><td align="left" valign="top">20.5/12.0/35.6/32.0</td><td align="left" valign="top">3446.95</td><td align="left" valign="top">3812.80</td><td align="left" valign="top">3625.33</td><td align="left" valign="top">0.802</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">5</td><td align="left" valign="top">5.0/9.6/33.0/24.1/28.3</td><td align="left" valign="top">3238.82</td><td align="left" valign="top">3697.68</td><td align="left" valign="top">3462.54</td><td align="left" valign="top">0.838</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top">24.8/10.2/30.1/16.8/13.6/4.6</td><td align="left" valign="top">3218.26</td><td align="left" valign="top">3770.13</td><td align="left" valign="top">3487.33</td><td align="left" valign="top">0.884</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top" colspan="8">T4</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">2</td><td align="left" valign="top">41.3/58.7</td><td align="left" valign="top">5174.42</td><td align="left" valign="top">5354.24</td><td align="left" valign="top">5262.09</td><td align="left" valign="top">0.687</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">3</td><td align="left" valign="top">18.4/42.2/39.4</td><td align="left" valign="top">4448.84</td><td align="left" valign="top">4721.68</td><td align="left" valign="top">4581.87</td><td align="left" valign="top">0.798</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">4</td><td align="left" valign="top">11.2/44.0/35.8/9.0</td><td align="left" valign="top">4034.00</td><td align="left" valign="top">4399.85</td><td align="left" valign="top">4212.37</td><td align="left" valign="top">0.849</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">5</td><td align="left" valign="top">8.7/9.8/29.9/26.8/24.7</td><td align="left" valign="top">3869.07</td><td align="left" valign="top">4327.93</td><td align="left" valign="top">4092.79</td><td align="left" valign="top">0.842</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top">8.0/9.9/30.8/19.4/27.6/4.2</td><td align="left" valign="top">3832.77</td><td align="left" valign="top">4384.64</td><td align="left" valign="top">4101.84</td><td align="left" valign="top">0.834</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top" colspan="8">T5</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">2</td><td align="left" valign="top">49.0/51.0</td><td align="left" valign="top">5576.80</td><td align="left" valign="top">5756.62</td><td align="left" valign="top">5664.47</td><td align="left" valign="top">0.694</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">3</td><td align="left" valign="top">22.7/35.5/41.8</td><td align="left" valign="top">4771.20</td><td align="left" valign="top">5044.04</td><td align="left" valign="top">4904.23</td><td align="left" valign="top">0.812</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">4</td><td align="left" valign="top">11.8/34.9/43.4/9.9</td><td align="left" valign="top">4272.68</td><td align="left" valign="top">4638.53</td><td align="left" valign="top">4451.05</td><td align="left" valign="top">0.848</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">5</td><td align="left" valign="top">9.8/12.5/32.2/37.2/8.2</td><td align="left" valign="top">4087.68</td><td align="left" valign="top">4639.55</td><td align="left" valign="top">4356.75</td><td align="left" valign="top">0.849</td><td align="left" valign="top">&#x003C;0.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top">13.2/9.0/30.6/34.8/8.8/3.6</td><td align="left" valign="top">4135.51</td><td align="left" valign="top">4594.37</td><td align="left" valign="top">4359.23</td><td align="left" valign="top">0.839</td><td align="left" valign="top">&#x003C;0.001</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>AIC: Akaike Information Criterion.</p></fn><fn id="table3fn2"><p><sup>b</sup>BIC: Bayesian Information Criterion.</p></fn><fn id="table3fn3"><p><sup>c</sup>SaBIC: sample-size adjusted Bayesian Information Criterion.</p></fn><fn id="table3fn4"><p><sup>d</sup>BLRT: Bootstrap Likelihood Ratio Test.</p></fn><fn id="table3fn5"><p><sup>e</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Likelihood ratio tests for measurement invariance of the latent transition analysis model.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">LogLik<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="bottom">&#x2212;2LL<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="left" valign="bottom">Chi-square<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup> (<italic>df</italic>)<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top">M_free<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="char" char="." valign="top">&#x2212;97100</td><td align="char" char="." valign="top">194,200</td><td align="char" char="." valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">M_invariant<sup><xref ref-type="table-fn" rid="table4fn7">g</xref></sup></td><td align="char" char="." valign="top">&#x2212;97144.4</td><td align="char" char="." valign="top">194,288.7</td><td align="char" char="." valign="top">88.7 (75)</td><td align="char" char="." valign="top">.14</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup> LogLik: log-likelihood.</p></fn><fn id="table4fn2"><p><sup>b</sup>&#x2212;2LL: &#x2212;2 log-likelihood.</p></fn><fn id="table4fn3"><p><sup>c</sup>&#x0394;<italic>&#x03C7;</italic>&#x00B2;: chi-square difference test.</p></fn><fn id="table4fn4"><p><sup>d</sup>&#x0394;df: difference in degrees of freedom.</p></fn><fn id="table4fn5"><p><sup>e</sup>M_free: model with all parameters freely estimated across time.</p></fn><fn id="table4fn6"><p><sup>f</sup>Not applicable.</p></fn><fn id="table4fn7"><p><sup>g</sup>M_invariant: model with item-response probabilities constrained to be equal across time points.</p></fn></table-wrap-foot></table-wrap><p>The LTA clustering results for the 5-class model are shown in <xref ref-type="fig" rid="figure4">Figure 4</xref>. Class 1 (19.8%) had the highest overall disease burden, with nearly universal hypertension (82.7%), accompanied by high prevalence of heart disease (78.1%), dyslipidemia (69.3%), gastritis (68.9%), diabetes (43.9%), arthritis (90.4%), lung disease (44.3%), and stroke (24.1%). This class represented severe multisystem involvement [<xref ref-type="bibr" rid="ref47">47</xref>] and was labeled the &#x201C;Severe Cardiometabolic-Multisystem Pattern.&#x201D;</p><p>Class 2 (22.3%) was characterized by extremely high probabilities of hypertension (99.7%) and arthritis (99.6%). Gastritis showed a moderate prevalence (37.5%), while dyslipidemia (27.3%) and heart disease (26.6%) were present in about one-quarter of individuals. Other conditions were rare (&#x003C;13%), indicating a pattern primarily dominated by hypertension and joint disease. This class was labeled the &#x201C;Hypertension-Arthritis Pattern.&#x201D;</p><p>Class 3 (15.6%) was dominated by lung disease (90.8%), with co-occurrence of arthritis (58.7%), asthma (50.4%), gastritis (43.4%), and hypertension (35.3%). This class was labeled the &#x201C;Respiratory-Musculoskeletal Pattern.&#x201D;</p><p>Class 4 (24.1%) exhibited typical metabolic syndrome features, including high prevalence of hypertension (88.3%), dyslipidemia (54.5%), and heart disease (44.4%), with moderate diabetes (36.3%) and gastritis (33.4%). Other diseases were less common (&#x003C;14%), indicating a primarily cardiometabolic profile without extensive multisystem involvement. This class was labeled the &#x201C;Metabolic Syndrome Pattern.&#x201D;</p><p>Class 5 (18.2%) had the lowest overall disease burden, dominated by arthritis (85.2%) and gastritis (76.4%), with all other diseases below 25%. This class was labeled the &#x201C;Gastritis-Arthritis Pattern.&#x201D; The naming of latent multimorbidity patterns was guided by the dominant disease combinations and their clinical interpretation, following conventions widely used in epidemiologic and multimorbidity-cluster studies [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref48">48</xref>-<xref ref-type="bibr" rid="ref52">52</xref>].</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Disease probabilities in the 5 multimorbidity patterns.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84261_fig04.png"/></fig></sec><sec id="s3-3"><title>Performance Evaluation Results of CLA-Net</title><p>In the experiments, the proposed method was trained on the training set and evaluated on an independent test set. To ensure robustness, the entire process was repeated 50 times, and the mean and SD of all evaluation metrics across these runs were reported as the final performance results. The comparative performance of all models on the multimorbidity pattern prediction task is summarized in <xref ref-type="table" rid="table5">Table 5</xref>, the ROC curves are presented in <xref ref-type="fig" rid="figure5">Figures 5</xref> and <xref ref-type="fig" rid="figure6">6</xref>, and the detailed results of the ablation study are shown in <xref ref-type="table" rid="table6">Table 6</xref>.</p><p>As shown in <xref ref-type="table" rid="table5">Table 5</xref>, CLA-Net consistently outperformed all baseline models in multimorbidity pattern prediction. Traditional machine learning approaches perform relatively poorly, while deep learning models designed for temporal modeling yield substantial improvements. Among the baselines, Mamba performed best overall, while convolutional neural network (CNN) performed the worst, underscoring the importance of temporal feature modeling in longitudinal health data. The hybrid LSTM+transformer model achieved relatively high accuracy, highlighting the benefits of combining sequential modeling with attention mechanisms. Nevertheless, CLA-Net surpassed all baselines across accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score, with low SDs confirming its stability and robustness.</p><p>We further conducted formal statistical significance testing to assess whether the observed performance gains of CLA-Net over baseline models were statistically robust. Specifically, 2-sided Wilcoxon signed-rank tests were applied to paired performance scores obtained from 50 repeated experimental runs for each evaluation metric. To account for multiple comparisons across baseline models and metrics, <italic>P</italic> values were adjusted using the Holm step-down procedure. The Wilcoxon signed-rank test results are given in <xref ref-type="table" rid="table7">Table 7</xref>. The results indicate that CLA-Net achieves statistically significant performance improvements over all baseline models. The consistent significance across repeated experiments confirms that the observed gains are robust rather than driven by random fluctuations.</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>The results of the performance comparison.<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Types of models and models</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">Accuracy</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="5">Machine learning models, mean (SD)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Logistic regression</td><td align="char" char="." valign="top">0.7385 (0.0075)</td><td align="char" char="." valign="top">0.7350 (0.0080)</td><td align="char" char="." valign="top">0.7320 (0.0082)</td><td align="char" char="." valign="top">0.7335 (0.0079)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SVM<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></td><td align="char" char="." valign="top">0.7540 (0.0069)</td><td align="char" char="." valign="top">0.7510 (0.0074)</td><td align="char" char="." valign="top">0.7480 (0.0076)</td><td align="char" char="." valign="top">0.7495 (0.0072)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Random forest</td><td align="char" char="." valign="top">0.7650 (0.0062)</td><td align="char" char="." valign="top">0.7620 (0.0068)</td><td align="char" char="." valign="top">0.7590 (0.0070)</td><td align="char" char="." valign="top">0.7605 (0.0066)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost</td><td align="char" char="." valign="top">0.7856 (0.0059)</td><td align="char" char="." valign="top">0.7820 (0.0062)</td><td align="char" char="." valign="top">0.7835 (0.0060)</td><td align="char" char="." valign="top">0.7892 (0.0052)</td></tr><tr><td align="left" valign="top" colspan="5">Deep learning models, mean (SD)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CNN<sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup></td><td align="char" char="." valign="top">0.7835 (0.0090)</td><td align="char" char="." valign="top">0.7779 (0.0084)</td><td align="char" char="." valign="top">0.7807 (0.0086)</td><td align="char" char="." valign="top">0.7860 (0.0082)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LSTM<sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></td><td align="char" char="." valign="top">0.8020 (0.0063)</td><td align="char" char="." valign="top">0.8040 (0.0070)</td><td align="char" char="." valign="top">0.8030 (0.0066)</td><td align="char" char="." valign="top">0.8041 (0.0069)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Transformer</td><td align="char" char="." valign="top">0.8138 (0.0079)</td><td align="char" char="." valign="top">0.8074 (0.0076)</td><td align="char" char="." valign="top">0.8106 (0.0072)</td><td align="char" char="." valign="top">0.8120 (0.0069)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>iTransformer</td><td align="char" char="." valign="top">0.8167 (0.0074)</td><td align="char" char="." valign="top">0.8189 (0.0078)</td><td align="char" char="." valign="top">0.8178 (0.0071)</td><td align="char" char="." valign="top">0.8184 (0.0070)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PatchTST</td><td align="char" char="." valign="top">0.8092 (0.0082)</td><td align="char" char="." valign="top">0.8057 (0.0088)</td><td align="char" char="." valign="top">0.8074 (0.0081)</td><td align="char" char="." valign="top">0.8088 (0.0083)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MambaTS</td><td align="char" char="." valign="top">0.8191 (0.0061)</td><td align="char" char="." valign="top">0.8214 (0.0067)</td><td align="char" char="." valign="top">0.8202 (0.0062)</td><td align="char" char="." valign="top">0.8210 (0.0064)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Mamba</td><td align="char" char="." valign="top">0.8226 (0.0062)</td><td align="char" char="." valign="top">0.8258 (0.0068)</td><td align="char" char="." valign="top">0.8242 (0.0061)</td><td align="char" char="." valign="top">0.8242 (0.0064)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LSTM + Transformer</td><td align="char" char="." valign="top">0.8234 (0.0060)</td><td align="char" char="." valign="top">0.8256 (0.0065)</td><td align="char" char="." valign="top">0.8245 (0.0062)</td><td align="char" char="." valign="top">0.8247 (0.0058)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CLA-Net (our model)</td><td align="char" char="." valign="top"><italic>0.8326 (0.0053)</italic></td><td align="char" char="." valign="top"><italic>0.8312 (0.0056)</italic></td><td align="char" char="." valign="top"><italic>0.8319 (0.0051)</italic></td><td align="char" char="." valign="top"><italic>0.8352 (0.0048)</italic></td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>The italicized values represent the best performance of each data handling strategy on the evaluation metrics. These italicized values are used to highlight the most outstanding results among the different strategies.</p></fn><fn id="table5fn2"><p><sup>b</sup>SVM: support vector machine.</p></fn><fn id="table5fn3"><p><sup>c</sup>CNN: convolutional neural network.</p></fn><fn id="table5fn4"><p><sup>d</sup>LSTM: long short-term memory.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>The area under the receiver operating characteristic curve of the Cross-Lag Attention Network and baseline models. AUC: area under the curve; CLA-Net: Cross-Lag Attention Network; CNN: convolutional neural network; LSTM: long short-term memory; SVM: support vector machine.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84261_fig05.png"/></fig><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>The area under the precision-recall curve of the Cross-Lag Attention Network and baseline models. AU -PRC: area under the precision-recall curve; CLA-Net: Cross-Lag Attention Network; CNN: convolutional neural network; LSTM: long short-term memory; SVM: support vector machine.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84261_fig06.png"/></fig><table-wrap id="t6" position="float"><label>Table 6.</label><caption><p>Results of the ablation study.<sup><xref ref-type="table-fn" rid="table6fn1">a</xref></sup></p></caption><table id="table6" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Type and ablation setup</td><td align="left" valign="bottom">Precision, mean (SD)</td><td align="left" valign="bottom">Recall, mean (SD)</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score, mean (SD)</td><td align="left" valign="bottom">Accuracy, mean (SD)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="6">Full model</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">CLA-Net<sup><xref ref-type="table-fn" rid="table6fn2">b</xref></sup> (our model)</td><td align="char" char="plusmn" valign="top"><italic>0.8326 (0.0053)</italic></td><td align="char" char="plusmn" valign="top"><italic>0.8312 (0.0056)</italic></td><td align="char" char="plusmn" valign="top"><italic>0.8319 (0.0051)</italic></td><td align="char" char="plusmn" valign="top"><italic>0.8352 (0.0048)</italic></td></tr><tr><td align="left" valign="top" colspan="6">Module ablation</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Transformer branch</td><td align="char" char="plusmn" valign="top">0.8153 (0.0068)</td><td align="char" char="plusmn" valign="top">0.8116 (0.0071)</td><td align="char" char="plusmn" valign="top">0.8134 (0.0069)</td><td align="char" char="plusmn" valign="top">0.8166 (0.0065)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">GRU<sup><xref ref-type="table-fn" rid="table6fn3">c</xref></sup> branch</td><td align="char" char="plusmn" valign="top">0.8079 (0.0074)</td><td align="char" char="plusmn" valign="top">0.8067 (0.0078)</td><td align="char" char="plusmn" valign="top">0.8073 (0.0075)</td><td align="char" char="plusmn" valign="top">0.8102 (0.0072)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Replacing the bitemporal directed cross-attention mechanism with self-attention</td><td align="char" char="plusmn" valign="top">0.8218 (0.0059)</td><td align="char" char="plusmn" valign="top">0.8196 (0.0062)</td><td align="char" char="plusmn" valign="top">0.8207 (0.0060)</td><td align="char" char="plusmn" valign="top">0.8244 (0.0057)</td></tr><tr><td align="left" valign="top" colspan="6">Input configuration ablation</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Single-time-point input</td><td align="char" char="plusmn" valign="top">0.7982 (0.0083)</td><td align="char" char="plusmn" valign="top">0.7964 (0.0087)</td><td align="char" char="plusmn" valign="top">0.7973 (0.0085)</td><td align="char" char="plusmn" valign="top">0.7985 (0.0082)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Three-time-point input</td><td align="char" char="plusmn" valign="top"><italic>0.8338 (0.0055)</italic></td><td align="char" char="plusmn" valign="top">0.8265 (0.0058)</td><td align="char" char="plusmn" valign="top">0.8301 (0.0056)</td><td align="char" char="plusmn" valign="top">0.8311 (0.0053)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Four-time-point input</td><td align="char" char="plusmn" valign="top">0.8251 (0.0057)</td><td align="char" char="plusmn" valign="top"><italic>0.8316 (0.0061)</italic></td><td align="char" char="plusmn" valign="top">0.8283 (0.0059)</td><td align="char" char="plusmn" valign="top">0.8287 (0.0056)</td></tr><tr><td align="left" valign="top" colspan="6">Architecture design</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Reversing the order of transformer and GRU components</td><td align="char" char="plusmn" valign="top">0.8230 (0.0061)</td><td align="char" char="plusmn" valign="top">0.8208 (0.0064)</td><td align="char" char="plusmn" valign="top">0.8219 (0.0062)</td><td align="char" char="plusmn" valign="top">0.8235 (0.0059)</td></tr></tbody></table><table-wrap-foot><fn id="table6fn1"><p><sup>a</sup>The italicized values represent the best performance of each data handling strategy on the evaluation metrics.</p></fn><fn id="table6fn2"><p><sup>b</sup>CLA-Net: Cross-Lag Attention Network.</p></fn><fn id="table6fn3"><p><sup>c</sup>GRU: Gated Recurrent Unit.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t7" position="float"><label>Table 7.</label><caption><p>Statistical test results.<sup><xref ref-type="table-fn" rid="table7fn1">a</xref></sup></p></caption><table id="table7" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Baseline</td><td align="left" valign="bottom">Precision <italic>W</italic></td><td align="left" valign="bottom">Recall <italic>W</italic></td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score <italic>W</italic></td><td align="left" valign="bottom">Accuracy <italic>W</italic></td></tr></thead><tbody><tr><td align="left" valign="top">Logistic regression</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td></tr><tr><td align="left" valign="top">SVM<sup><xref ref-type="table-fn" rid="table7fn2">b</xref></sup></td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td></tr><tr><td align="left" valign="top">Random forest</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td></tr><tr><td align="left" valign="top">XGBoost</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td></tr><tr><td align="left" valign="top">CNN<sup><xref ref-type="table-fn" rid="table7fn3">c</xref></sup></td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td></tr><tr><td align="left" valign="top">LSTM<sup><xref ref-type="table-fn" rid="table7fn4">d</xref></sup></td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td></tr><tr><td align="left" valign="top">Transformer</td><td align="left" valign="top">1***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">1***</td></tr><tr><td align="left" valign="top">iTransformer</td><td align="left" valign="top">13***</td><td align="left" valign="top">68***</td><td align="left" valign="top">20***</td><td align="left" valign="top">15***</td></tr><tr><td align="left" valign="top">PatchTST</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td><td align="left" valign="top">0***</td></tr><tr><td align="left" valign="top">MambaTS</td><td align="left" valign="top">27***</td><td align="left" valign="top">82***</td><td align="left" valign="top">39***</td><td align="left" valign="top">0***</td></tr><tr><td align="left" valign="top">Mamba</td><td align="left" valign="top">52***</td><td align="left" valign="top">258***</td><td align="left" valign="top">101***</td><td align="left" valign="top">11***</td></tr><tr><td align="left" valign="top">LSTM+transformer</td><td align="left" valign="top">80***</td><td align="left" valign="top">227***</td><td align="left" valign="top">126***</td><td align="left" valign="top">11***</td></tr></tbody></table><table-wrap-foot><fn id="table7fn1"><p><sup>a</sup>Two-sided Wilcoxon signed-rank tests were conducted on paired metric scores obtained from 50 repeated runs (n=50) to compare Cross-Lag Attention Network against each baseline model; Reported W corresponds to the Wilcoxon test statistic; To control for multiple comparisons across 12 baselines and 4 metrics, <italic>P</italic> values were adjusted using the Holm procedure; ***: <italic>P</italic>&#x003C;.001.</p></fn><fn id="table7fn2"><p><sup>b</sup>SVM: support vector machine. </p></fn><fn id="table7fn3"><p><sup>c</sup>CNN: convolutional neural network.</p></fn><fn id="table7fn4"><p><sup>d</sup>LSTM: long short-term memory.</p></fn></table-wrap-foot></table-wrap><p><xref ref-type="fig" rid="figure5">Figure 5</xref> presents the AUC-ROC curves comparing CLA-Net with a range of baseline models. As shown in <xref ref-type="fig" rid="figure5">Figure 5A</xref>, among deep learning approaches, CLA-Net achieves the highest AUC of 0.9293, demonstrating superior discriminative ability in multimorbidity pattern prediction. Models explicitly designed for temporal modeling, such as Mamba (AUC=0.9187), MambaTS (AUC=0.9142), PatchTST (AUC=0.9017), and iTransformer (AUC=0.9016), outperform conventional CNN-based architectures, highlighting the importance of capturing long-range temporal dependencies in longitudinal health data. The hybrid LSTM+transformer model attains an AUC of 0.9106, ranking among the strongest deep learning baselines, while CNN yields the lowest AUC (0.8746) within this group.</p><p>To further assess performance under class imbalance, <xref ref-type="fig" rid="figure6">Figure 6</xref> reports the PR curves and corresponding AUC-PRC values. As shown in <xref ref-type="fig" rid="figure6">Figure 6</xref>, CLA-Net achieves the highest AUC-PRC among deep learning models (0.8885), indicating a superior PR trade-off across recall levels. Temporal models such as Mamba, iTransformer, and LSTM+transformer also perform competitively, whereas CNN shows relatively weaker performance. <xref ref-type="fig" rid="figure6">Figure 6</xref> demonstrates that CLA-Net substantially outperforms traditional machine learning baselines, including logistic regression, support vector machine, random forest, and XGBoost. Overall, the PRC results complement the ROC analysis and further confirm the robustness of CLA-Net in identifying future multimorbidity patterns under imbalanced conditions.</p><p>As summarized in <xref ref-type="table" rid="table6">Table 6</xref>, the full CLA-Net consistently outperformed all ablation variants, underscoring the importance of integrating both GRU and the bitemporal directed cross-attention mechanism. Removing either component or replacing cross-attention with self-attention led to clear performance drops, confirming the effectiveness of the original design.</p><p>Regarding input configurations, single-time-point input resulted in the largest decline, while 3-time-point and 4-time-point settings provided only marginal gains but reduced overall accuracy, indicating that the 2-time-point design achieves the optimal balance, efficiently capturing the most relevant temporal signals without introducing the complexity or noise associated with longer historical windows. These results indicate that, under the current architecture, incorporating immediate temporal context is critical for prediction, whereas extending the input window beyond 2 time points yields diminishing or even negative returns. In addition, reversing the order of transformer and GRU also impaired performance, supporting the strategy of &#x201C;temporal encoding before interaction.&#x201D;</p><p><xref ref-type="fig" rid="figure7">Figures 7</xref> and <xref ref-type="fig" rid="figure8">8</xref> show the class-specific ROC and PR curves for the 5 multimorbidity patterns. All classes achieved consistently high ROC performance, with AUCs ranging from 0.9198 to 0.9426 and a macroaverage of 0.9293. The PRC analysis yielded AUC-PRC values between 0.8310 and 0.8620 (macroaverage=0.8429), indicating stable precision-recall trade-offs across classes. Overall, CLA-Net demonstrates robust and balanced predictive performance for all multimorbidity patterns.</p><p>To complement this threshold-free evaluation, <xref ref-type="table" rid="table8">Table 8</xref> reports classwise precision, recall, and <italic>F</italic><sub>1</sub>-scores, indicating consistently good predictive performance across patterns, with <italic>F</italic><sub>1</sub>-scores ranging from 0.7906 to 0.8508. As further illustrated by the confusion matrix in <xref ref-type="fig" rid="figure9">Figure 9</xref>, classification errors are primarily concentrated among clinically related multimorbidity patterns, suggesting that residual misclassifications arise from intrinsic overlap between multimorbidity profiles rather than systematic model failure.</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Multiclass area under the receiver operating characteristic curve of the Cross-Lag Attention Network. AUC: area under the curve.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84261_fig07.png"/></fig><fig position="float" id="figure8"><label>Figure 8.</label><caption><p>Multiclass area under the precision-recall curve of Cross-Lag Attention Network. AU-PRC: area under the precision-recall curve.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84261_fig08.png"/></fig><table-wrap id="t8" position="float"><label>Table 8.</label><caption><p>Detailed classwise performance metrics of Cross-Lag Attention Network.</p></caption><table id="table8" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Multimorbidity pattern (class)</td><td align="left" valign="bottom">Prevalence (%)</td><td align="left" valign="bottom">Precision, mean (SD)</td><td align="left" valign="bottom">Recall, mean (SD)</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score, mean (SD)</td></tr></thead><tbody><tr><td align="left" valign="top">C1: Severe Cardiometabolic-Multisystem</td><td align="left" valign="top">19.80</td><td align="char" char="plusmn" valign="top">0.8403 (0.006)</td><td align="left" valign="top">0.8314 (0.007)</td><td align="left" valign="top">0.8359 (0.006)</td></tr><tr><td align="left" valign="top">C2: Hypertension-Arthritis</td><td align="left" valign="top">22.30</td><td align="char" char="plusmn" valign="top">0.8402 (0.005)</td><td align="left" valign="top">0.8515 (0.006)</td><td align="left" valign="top">0.8458 (0.005)</td></tr><tr><td align="left" valign="top">C3: Respiratory-Musculoskeletal</td><td align="left" valign="top">15.60</td><td align="char" char="plusmn" valign="top">0.8002 (0.009)</td><td align="left" valign="top">0.7813 (0.010)</td><td align="left" valign="top">0.7906 (0.009)</td></tr><tr><td align="left" valign="top">C4: Metabolic Syndrome</td><td align="left" valign="top">24.10</td><td align="char" char="plusmn" valign="top">0.8480 (0.006)</td><td align="left" valign="top">0.8535 (0.006)</td><td align="left" valign="top">0.8508 (0.006)</td></tr><tr><td align="left" valign="top">C5: Gastritis-Arthritis</td><td align="left" valign="top">18.20</td><td align="char" char="plusmn" valign="top">0.8221 (0.007)</td><td align="left" valign="top">0.8194 (0.008)</td><td align="left" valign="top">0.8207 (0.007)</td></tr><tr><td align="left" valign="top">Macroaverage</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table8fn1">a</xref></sup></td><td align="char" char="." valign="top">0.8303 (0.007)</td><td align="left" valign="top">0.8274 (0.008)</td><td align="left" valign="top">0.8288 (0.007)</td></tr><tr><td align="left" valign="top">Weighted average (overall)</td><td align="left" valign="top">100</td><td align="char" char="." valign="top">0.8326 (0.006)</td><td align="left" valign="top">0.8312 (0.007)</td><td align="left" valign="top">0.8319 (0.006)</td></tr></tbody></table><table-wrap-foot><fn id="table8fn1"><p><sup>a</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure9"><label>Figure 9.</label><caption><p>Confusion matrix across the 5 multimorbidity patterns.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84261_fig09.png"/></fig></sec><sec id="s3-4"><title>Sensitivity Analysis: Robustness of Class Stability</title><p>To address concerns regarding potential information leakage and the temporal robustness of latent class definitions, we conducted a comprehensive sensitivity analysis in which the LTA was reestimated using only the training-period waves (2011&#x2010;2015), thereby excluding all future observations used as prediction targets in the main analysis. This analysis was designed to evaluate whether the multimorbidity pattern structure identified in the full longitudinal dataset was inherently stable, rather than being driven by information from later waves.</p><p>We first reassessed the optimal number of latent classes by fitting LTA models with 2 to 6 classes under a free-parameter specification using the training-period data only. As shown in <xref ref-type="table" rid="table9">Table 9</xref>, information criteria (AIC, BIC, and SaBIC) consistently improved as the number of classes increased from 2 to 5, while a marginal deterioration in fit was observed when moving from 5 to 6 classes. The BLRT strongly supported the 5-class solution over the 4-class solution (<italic>P</italic>&#x003C;.001), whereas the improvement from 5 to 6 classes was only marginal (<italic>P</italic>=.03), suggesting diminishing returns with increased model complexity.</p><table-wrap id="t9" position="float"><label>Table 9.</label><caption><p>Model fit of latent transition analysis based on training-period waves (2011&#x2010;2015).</p></caption><table id="table9" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Models (waves used) and number of classes</td><td align="left" valign="bottom">AIC<sup><xref ref-type="table-fn" rid="table9fn1">a</xref></sup></td><td align="left" valign="bottom">BIC<sup><xref ref-type="table-fn" rid="table9fn2">b</xref></sup></td><td align="left" valign="bottom">SaBIC<sup><xref ref-type="table-fn" rid="table9fn3">c</xref></sup></td><td align="left" valign="bottom">Entropy</td><td align="left" valign="bottom">BLRT<sup><xref ref-type="table-fn" rid="table9fn4">d</xref></sup> (<italic>P</italic> value)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="7">LTA<sup><xref ref-type="table-fn" rid="table9fn5">e</xref></sup> (free parameters; 2011-2015)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">2</td><td align="char" char="." valign="top">6128.4</td><td align="char" char="." valign="top">6316.2</td><td align="char" char="." valign="top">6201.7</td><td align="char" char="." valign="top">0.72</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table9fn6">f</xref></sup></td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">3</td><td align="char" char="." valign="top">5486.9</td><td align="char" char="." valign="top">5759.8</td><td align="char" char="." valign="top">5594.3</td><td align="char" char="." valign="top">0.8</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">4</td><td align="char" char="." valign="top">5142.6</td><td align="char" char="." valign="top">5500.7</td><td align="char" char="." valign="top">5284.9</td><td align="char" char="." valign="top">0.84</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">5</td><td align="char" char="." valign="top">4926.8</td><td align="char" char="." valign="top">5369.9</td><td align="char" char="." valign="top">5104.2</td><td align="char" char="." valign="top">0.87</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">6</td><td align="char" char="." valign="top">4954.1</td><td align="char" char="." valign="top">5482.4</td><td align="char" char="." valign="top">5166.5</td><td align="char" char="." valign="top">0.85</td><td align="left" valign="top">.03</td></tr><tr><td align="left" valign="top" colspan="7">Selected model (2011-2015)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">5</td><td align="char" char="." valign="top">4926.8</td><td align="char" char="." valign="top">5369.9</td><td align="char" char="." valign="top">5104.2</td><td align="char" char="." valign="top">0.87</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="7">LTA (invariant)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">5</td><td align="char" char="." valign="top">4968.3</td><td align="char" char="." valign="top">5389.6</td><td align="char" char="." valign="top">5136.8</td><td align="char" char="." valign="top">0.86</td><td align="left" valign="top">&#x2014;</td></tr></tbody></table><table-wrap-foot><fn id="table9fn1"><p><sup>a</sup>AIC: Akaike Information Criterion.</p></fn><fn id="table9fn2"><p><sup>b</sup>BIC: Bayesian Information Criterion.</p></fn><fn id="table9fn3"><p><sup>c</sup>SaBIC: sample-size adjusted Bayesian Information Criterion.</p></fn><fn id="table9fn4"><p><sup>d</sup>BLRT: Bootstrap Likelihood Ratio Test.</p></fn><fn id="table9fn5"><p><sup>e</sup>LTA: latent transition analysis.</p></fn><fn id="table9fn6"><p><sup>f</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><p>In addition, classification quality, as reflected by entropy, reached its highest value under the 5-class solution (entropy=0.87), indicating satisfactory class separation despite the reduced number of waves and observations compared with the full-sample model. Taken together, these results demonstrate that the 5-class structure remained the optimal and most parsimonious representation of multimorbidity patterns even when the analysis was restricted to the training-period data.</p><p>To ensure consistency of class interpretation across time, measurement invariance constraints were imposed on the selected 5-class model. Although imposing invariance resulted in a modest increase in information criteria and a slight reduction in entropy (from 0.87 to 0.86), the overall classification quality remained high, indicating that the core latent structure was robust to parameter constraints and not dependent on future observations.</p><p>Beyond overall model fit, we directly examined the stability of class definitions by comparing disease-specific item-response probabilities between the full-sample LTA model and the training-period-only LTA model. For each latent class, the top 4 representative chronic conditions were retained, and their conditional probabilities were contrasted across the 2 modeling strategies (<xref ref-type="table" rid="table10">Table 10</xref>).</p><table-wrap id="t10" position="float"><label>Table 10.</label><caption><p>Concordance of class definitions between full-sample latent transition analysis and training-only latent transition analysis.</p></caption><table id="table10" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Multimorbidity pattern (class) and<break/>top 4 representative conditions<sup><xref ref-type="table-fn" rid="table10fn1">a</xref></sup></td><td align="left" valign="bottom">Probability<break/>(full model)<sup><xref ref-type="table-fn" rid="table10fn2">b</xref></sup></td><td align="left" valign="bottom">Probability (training period only)<sup><xref ref-type="table-fn" rid="table10fn3">c</xref></sup></td><td align="left" valign="bottom">Difference (bias)</td><td align="left" valign="bottom">Pearson correlation coefficient (<italic>r</italic>)<sup><xref ref-type="table-fn" rid="table10fn4">d</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Class 1: Cardiometabolic-Multisystem</td><td align="char" char="." valign="top">0.954</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Arthritis</td><td align="char" char="." valign="top">0.904</td><td align="char" char="." valign="top">0.774</td><td align="char" char="." valign="top">&#x2212;0.130</td><td align="char" char="." valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hypertension</td><td align="char" char="." valign="top">0.827</td><td align="char" char="." valign="top">0.854</td><td align="char" char="." valign="top">0.027</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Heart disease</td><td align="char" char="." valign="top">0.781</td><td align="char" char="." valign="top">0.781</td><td align="char" char="." valign="top">0.000</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Dyslipidemia</td><td align="char" char="." valign="top">0.693</td><td align="char" char="." valign="top">0.662</td><td align="char" char="." valign="top">&#x2212;0.031</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="4">Class 2: Hypertension-Arthritis</td><td align="char" char="." valign="top">0.983</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hypertension</td><td align="char" char="." valign="top">0.997</td><td align="char" char="." valign="top">0.917</td><td align="char" char="." valign="top">&#x2212;0.080</td><td align="char" char="." valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Arthritis</td><td align="char" char="." valign="top">0.996</td><td align="char" char="." valign="top">0.958</td><td align="char" char="." valign="top">&#x2212;0.038</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gastric disease</td><td align="char" char="." valign="top">0.374</td><td align="char" char="." valign="top">0.446</td><td align="char" char="." valign="top">0.072</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Dyslipidemia</td><td align="char" char="." valign="top">0.273</td><td align="char" char="." valign="top">0.331</td><td align="char" char="." valign="top">0.058</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="4">Class 3: Respiratory-Musculoskeletal</td><td align="char" char="." valign="top">0.970</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lung disease</td><td align="char" char="." valign="top">0.908</td><td align="char" char="." valign="top">0.783</td><td align="char" char="." valign="top">&#x2212;0.125</td><td align="char" char="." valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Arthritis</td><td align="char" char="." valign="top">0.587</td><td align="char" char="." valign="top">0.604</td><td align="char" char="." valign="top">0.017</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Asthma</td><td align="char" char="." valign="top">0.504</td><td align="char" char="." valign="top">0.540</td><td align="char" char="." valign="top">0.036</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gastric disease</td><td align="char" char="." valign="top">0.434</td><td align="char" char="." valign="top">0.466</td><td align="char" char="." valign="top">0.032</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="4">Class 4: Metabolic Syndrome</td><td align="char" char="." valign="top">0.973</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hypertension</td><td align="char" char="." valign="top">0.883</td><td align="char" char="." valign="top">0.841</td><td align="char" char="." valign="top">&#x2212;0.042</td><td align="char" char="." valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Dyslipidemia</td><td align="char" char="." valign="top">0.545</td><td align="char" char="." valign="top">0.646</td><td align="char" char="." valign="top">0.101</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Heart disease</td><td align="char" char="." valign="top">0.444</td><td align="char" char="." valign="top">0.346</td><td align="char" char="." valign="top">&#x2212;0.098</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Diabetes</td><td align="char" char="." valign="top">0.363</td><td align="char" char="." valign="top">0.308</td><td align="char" char="." valign="top">&#x2212;0.055</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="4">Class 5: Gastritis-Arthritis</td><td align="char" char="." valign="top">0.965</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Arthritis</td><td align="char" char="." valign="top">0.852</td><td align="char" char="." valign="top">0.892</td><td align="char" char="." valign="top">0.040</td><td align="char" char="." valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gastric disease</td><td align="char" char="." valign="top">0.764</td><td align="char" char="." valign="top">0.818</td><td align="char" char="." valign="top">0.054</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Heart disease</td><td align="char" char="." valign="top">0.242</td><td align="char" char="." valign="top">0.302</td><td align="char" char="." valign="top">0.060</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kidney disease</td><td align="char" char="." valign="top">0.194</td><td align="char" char="." valign="top">0.253</td><td align="char" char="." valign="top">0.059</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="5">Overall stability</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Average across all classes</td><td align="char" char="." valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table10fn7">g</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="char" char="." valign="top">0.048<sup><xref ref-type="table-fn" rid="table10fn5">e</xref></sup></td><td align="char" char="." valign="top">0.969<sup><xref ref-type="table-fn" rid="table10fn6">f</xref></sup></td></tr></tbody></table><table-wrap-foot><fn id="table10fn1"><p><sup>a</sup>To ensure consistency, the top 4 conditions with the highest prevalence in the full model were selected for comparison for each pattern.</p></fn><fn id="table10fn2"><p><sup>b</sup>Derived from the complete dataset, including Waves 1 through 5 (2011&#x2010;2020).</p></fn><fn id="table10fn3"><p><sup>c</sup>Derived exclusively from the training dataset (Waves 1 through 3, 2011&#x2010;2015), blinding the model to future test data.</p></fn><fn id="table10fn4"><p><sup>d</sup>Pearson correlation coefficient calculated based on the probability vectors of all 14 chronic conditions for the corresponding class.</p></fn><fn id="table10fn5"><p><sup>e</sup>Mean absolute deviation value.</p></fn><fn id="table10fn6"><p><sup>f</sup>Mean value.</p></fn><fn id="table10fn7"><p><sup>g</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><p>Across all 5 classes, the composition and relative ordering of dominant conditions remained highly consistent. The cardiometabolic-multisystem class continued to be characterized by a high prevalence of arthritis, hypertension, heart disease, and dyslipidemia; the hypertension-arthritis class preserved its defining dual dominance of hypertension and arthritis; the respiratory-musculoskeletal class remained anchored by lung disease and asthma; the metabolic syndrome class retained its core metabolic features; and the gastritis-arthritis class continued to reflect co-occurring gastrointestinal and musculoskeletal conditions. Importantly, no class exhibited a change in its defining disease constellation when future waves were excluded from model estimation.</p><p>Quantitatively, the similarity between item-response probability profiles was high across all classes, with Pearson correlation coefficients ranging from 0.954 to 0.983 and a mean correlation of 0.969. These correlations were computed using the full set of disease-specific probabilities, indicating strong structural concordance rather than superficial agreement limited to a subset of conditions [<xref ref-type="bibr" rid="ref53">53</xref>]. Differences in absolute probabilities were generally modest, with a mean absolute deviation of 0.048 across classes, reflecting only minor shifts in disease prevalence rather than substantive changes in class meaning.</p><p>Taken together, these findings provide strong evidence that the multimorbidity class structure identified in the main analysis is stable and not driven by information from future observations. Even when latent classes were defined exclusively using training-period data, both the optimal number of classes and their clinical interpretations remained highly consistent with those derived from the full-sample model. The observed differences were limited to modest shifts in absolute disease probabilities, while the underlying pattern structure and relative disease importance were preserved.</p><p>We acknowledge that, under a strictly unbiased evaluation framework, latent class definitions could alternatively be derived exclusively from training-period data and then applied to the test set, for example, by projecting test observations onto a training-derived latent space. In this study, however, we adopted a population-level LTA fitted on all available waves to define stable multimorbidity patterns, which were subsequently treated as fixed targets for prediction. Importantly, as demonstrated in <xref ref-type="table" rid="table9">Table 9</xref>, the latent class structures derived from training-period data and from the full dataset exhibited a high degree of concordance (mean <italic>r</italic>=0.969), indicating that the class definitions were not driven by any single wave or subset of the data. Therefore, the use of the full-sample LTA model to generate predictive targets does not introduce meaningful bias into the evaluation. The &#x201C;ground truth&#x201D; labels for the test set would remain virtually unchanged even if they were generated by projecting test data onto a latent space fitted strictly to training parameters. The reported predictive performance of CLA-Net reflects a genuine ability to forecast future multimorbidity states rather than an artifact of information leakage in the target generation process.</p><p>To explicitly assess whether the use of full-sample LTA targets introduces any optimistic bias in predictive evaluation, we conducted an additional experiment in which the LTA model was estimated exclusively on training-period data, with all model parameters fixed thereafter. The downstream prediction pipeline was then fully repeated using these training-only LTA-derived targets, and the CLA-Net model was retrained and reevaluated accordingly.</p><p>As summarized in <xref ref-type="table" rid="table11">Table 11</xref>, the predictive performance obtained under training-only LTA targets remains highly consistent with the original results based on full-sample LTA targets. Specifically, the differences in accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score are all below 0.3%, and all metrics remain well within 1 SD of the original estimates. The slight performance reductions observed are expected and can be attributed to minor numerical variations in latent class assignment, rather than to any systematic bias arising from information leakage.</p><table-wrap id="t11" position="float"><label>Table 11.</label><caption><p>Comparison of predictive performance under full-sample and training-only LTA<sup><xref ref-type="table-fn" rid="table11fn1">a</xref></sup> targets.</p></caption><table id="table11" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Metric</td><td align="left" valign="bottom">Full-sample LTA targets, mean (SD)</td><td align="left" valign="bottom">Training-only LTA targets, mean (SD)</td><td align="left" valign="bottom">Difference</td></tr></thead><tbody><tr><td align="left" valign="top">Accuracy</td><td align="char" char="plusmn" valign="top">0.8352 (0.0048)</td><td align="char" char="plusmn" valign="top">0.8338 (0.0050)</td><td align="char" char="." valign="top">&#x2212;0.0014 (&#x2212;0.17%)</td></tr><tr><td align="left" valign="top">Precision</td><td align="char" char="plusmn" valign="top">0.8326 (0.0053)</td><td align="char" char="plusmn" valign="top">0.8315 (0.0055)</td><td align="char" char="." valign="top">&#x2212;0.0011 (&#x2212;0.13%)</td></tr><tr><td align="left" valign="top">Recall</td><td align="char" char="plusmn" valign="top">0.8312 (0.0056)</td><td align="char" char="plusmn" valign="top">0.8290 (0.0060)</td><td align="char" char="." valign="top">&#x2212;0.0022 (&#x2212;0.26%)</td></tr><tr><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score</td><td align="char" char="plusmn" valign="top">0.8319 (0.0051)</td><td align="char" char="plusmn" valign="top">0.8302 (0.0054)</td><td align="char" char="." valign="top">&#x2212;0.0017 (&#x2212;0.20%)</td></tr></tbody></table><table-wrap-foot><fn id="table11fn1"><p><sup>a</sup>LTA: latent transition analysis.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-5"><title>Transition-Focused Evaluation of Multimorbidity Class Prediction</title><p>To disentangle the prediction of genuine multimorbidity progression from trivial class persistence, we conducted a stratified evaluation by separating individuals into those whose multimorbidity class remained unchanged between consecutive waves (stayer-only) and those who experienced a true class transition (transition-only). Performance was assessed using macroaveraged precision, recall, <italic>F</italic><sub>1</sub>-score, and overall accuracy for each subset.</p><p>To further account for chance agreement and the high prevalence of class persistence, we additionally report chance-adjusted performance metrics. Across the full sample, CLA-Net achieved a Cohen &#x03BA; of 0.7935 (SD 0.0055) and a Matthews correlation coefficient of 0.7968 (SD 0.0058), reflecting substantial chance-adjusted concordance between predicted and observed class labels. These values are consistent with the maintained performance on the transition-only subset, confirming that CLA-Net possesses robust discriminative power to identify genuine multimorbidity evolution rather than merely replicating static baselines.</p><p>As shown in <xref ref-type="table" rid="table12">Table 12</xref>, the model retained substantial discriminative ability on the transition-only subset, where individuals experienced a true change in multimorbidity class between <inline-formula><mml:math id="ieqn47"><mml:mi>t</mml:mi><mml:mi> </mml:mi></mml:math></inline-formula>and <inline-formula><mml:math id="ieqn48"><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>. On this subset, macroaveraged precision, recall, and <italic>F</italic><sub>1</sub>-score reached 0.7615 (SD 0.0106), 0.7632 (SD 0.0112), and 0.7623 (SD 0.0102), respectively, with an overall accuracy of 0.7654 (SD 0.0096). Although performance on the transition-only subset was lower than that observed in the full-sample and stayer-only evaluations, this reduction is expected given the increased complexity of predicting off-diagonal class transitions. Because multimorbidity patterns tend to remain relatively stable across adjacent waves in populations with largely irreversible chronic conditions, the transition-only subset represents a smaller and more heterogeneous group, which inherently increases prediction difficulty and performance variability, as reflected by the larger SDs observed.</p><table-wrap id="t12" position="float"><label>Table 12.</label><caption><p>Performance evaluation on different dynamic subsets.</p></caption><table id="table12" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Evaluation subset</td><td align="left" valign="bottom">Precision, mean (SD)</td><td align="left" valign="bottom">Recall, mean (SD)</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score, mean (SD)</td><td align="left" valign="bottom">Accuracy, mean (SD)</td></tr></thead><tbody><tr><td align="left" valign="top">Full sample</td><td align="char" char="plusmn" valign="top">0.8326 (0.0053)</td><td align="char" char="plusmn" valign="top">0.8312 (0.0056)</td><td align="char" char="plusmn" valign="top">0.8319 (0.0051)</td><td align="char" char="plusmn" valign="top">0.8352 (0.0048)</td></tr><tr><td align="left" valign="top">Transition-only (<italic>Y<sub>t</sub></italic> &#x2260; <italic>Y<sub>t</sub></italic><sub>+1</sub>; 23.47%)</td><td align="char" char="plusmn" valign="top">0.7615 (0.0106)</td><td align="char" char="plusmn" valign="top">0.7632 (0.0112)</td><td align="char" char="plusmn" valign="top">0.7623 (0.0102)</td><td align="char" char="plusmn" valign="top">0.7654 (0.0096)</td></tr><tr><td align="left" valign="top">Stayer-only (<italic>Y</italic><sub><italic>t</italic></sub> = <italic>Y<sub>t</sub></italic><sub>+1</sub>; 76.53%)</td><td align="char" char="plusmn" valign="top">0.8544 (0.0049)</td><td align="char" char="plusmn" valign="top">0.8521 (0.0052)</td><td align="char" char="plusmn" valign="top">0.8533 (0.0047)</td><td align="char" char="plusmn" valign="top">0.8566 (0.0044)</td></tr></tbody></table></table-wrap><p>As an important clinical benchmark, we further evaluated a naive persistence model that simply predicts class(<italic>t+1</italic>)=class(<italic>t</italic>). On the test set, the naive model obtained an overall accuracy of 0.7653, substantially below CLA-Net (0.8352, SD 0.0048). Notably, on the transition-only subset, the naive model achieves 0% accuracy by definition, whereas CLA-Net maintains 0.7654 accuracy, demonstrating its added value in identifying genuine transitions rather than merely reproducing stable states. Together, these results indicate that the overall performance gain is not driven by trivial persistence, but by the model&#x2019;s capacity to capture progression-related signals associated with changes in multimorbidity structure.</p><p>To further account for chance agreement and the high prevalence of class persistence, we additionally report chance-adjusted performance metrics. Across the full sample, CLA-Net achieved a Cohen &#x03BA; of 0.7935 (SD 0.0055) and a Matthews correlation coefficient of 0.7968 (SD 0.0058), reflecting substantial chance-adjusted concordance between predicted and observed class labels. These values are consistent with the maintained performance on the transition-only subset, confirming that CLA-Net possesses robust discriminative power to identify genuine multimorbidity evolution rather than merely replicating static baselines.</p></sec><sec id="s3-6"><title>Sensitivity Analysis of Data Imputation Methods</title><p>To strictly verify the validity and robustness of the imputation strategy used in this study (ie, MiceForest), we conducted a comparative experiment to assess how different missing data handling methods affect the final predictive performance of CLA-Net. In this experiment, the standard MiceForest algorithm was replaced by 4 alternative strategies: complete case analysis (CCA; directly discarding samples with missing values), random imputation (filling missing values with randomly selected observed values), mean imputation (filling with the variable mean), and KNN imputation. The comparative results are presented in <xref ref-type="table" rid="table13">Table 13</xref>.</p><table-wrap id="t13" position="float"><label>Table 13.</label><caption><p>Performance comparison of Cross-Lag Attention Network using different missing data handling strategies.<sup><xref ref-type="table-fn" rid="table13fn1">a</xref></sup></p></caption><table id="table13" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Imputation strategy</td><td align="left" valign="bottom">Precision, mean (SD)</td><td align="left" valign="bottom">Recall, mean (SD)</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score, mean (SD)</td><td align="left" valign="bottom">Accuracy, mean (SD)</td></tr></thead><tbody><tr><td align="left" valign="top">Complete case analysis</td><td align="char" char="plusmn" valign="top">0.8015 (0.0072)</td><td align="char" char="plusmn" valign="top">0.7980 (0.0078)</td><td align="char" char="plusmn" valign="top">0.7995 (0.0075)</td><td align="char" char="plusmn" valign="top">0.8050 (0.0070)</td></tr><tr><td align="left" valign="top">Random imputation</td><td align="char" char="plusmn" valign="top">0.8080 (0.0085)</td><td align="char" char="plusmn" valign="top">0.8050 (0.0090)</td><td align="char" char="plusmn" valign="top">0.8065 (0.0088)</td><td align="char" char="plusmn" valign="top">0.8110 (0.0082)</td></tr><tr><td align="left" valign="top">Mean imputation</td><td align="char" char="plusmn" valign="top">0.8150 (0.0065)</td><td align="char" char="plusmn" valign="top">0.8120 (0.0069)</td><td align="char" char="plusmn" valign="top">0.8135 (0.0066)</td><td align="char" char="plusmn" valign="top">0.8180 (0.0062)</td></tr><tr><td align="left" valign="top">KNN<sup><xref ref-type="table-fn" rid="table13fn2">b</xref></sup> imputation</td><td align="char" char="plusmn" valign="top">0.8265 (0.0058)</td><td align="char" char="plusmn" valign="top">0.8240 (0.0062)</td><td align="char" char="plusmn" valign="top">0.8252 (0.0060)</td><td align="char" char="plusmn" valign="top">0.8290 (0.0055)</td></tr><tr><td align="left" valign="top">MiceForest (ours)</td><td align="char" char="plusmn" valign="top"><italic>0.8326 (0.0053)</italic></td><td align="char" char="plusmn" valign="top"><italic>0.8312 (0.0056)</italic></td><td align="char" char="plusmn" valign="top"><italic>0.8319 (0.0051)</italic></td><td align="char" char="plusmn" valign="top"><italic>0.8352 (0.0048)</italic></td></tr></tbody></table><table-wrap-foot><fn id="table13fn1"><p><sup>a</sup>The italicized values represent the best performance of each data handling strategy on the evaluation metrics. These italicized values are used to highlight the most outstanding results among the different strategies.</p></fn><fn id="table13fn2"><p><sup>b</sup>KNN: k-nearest neighbors.</p></fn></table-wrap-foot></table-wrap><p>As shown in <xref ref-type="table" rid="table13">Table 13</xref>, the model using MiceForest for data imputation achieved the best performance across all metrics (accuracy=0.8352, <italic>F</italic><sub>1</sub>-score=0.8319). In contrast, CCA resulted in the lowest performance (accuracy=0.8050). It is important to note that the CCA metrics were evaluated on a reduced test set (subset of individuals with complete data only), whereas imputation methods were evaluated on the full test set. Despite being tested on this potentially &#x201C;cleaner&#x201D; subset, CCA still underperformed. This confirms that the substantial reduction in training sample size caused by discarding data severely compromises the model&#x2019;s ability to learn robust patterns, thereby limiting its generalizability. Random imputation also performed poorly (accuracy=0.8110), primarily because randomly assigned values introduce significant noise and disrupt the true correlation structure between variables. While mean imputation (accuracy=0.8180) and KNN imputation (accuracy=0.8290) showed improvements over random methods, they still lagged behind MiceForest.</p><p>These results indicate that MiceForest, which leverages iterative random forests to model non-linear interactions, effectively preserves the underlying data structure better than stochastic (random) or distance-based (KNN) methods, confirming it as the optimal choice for our framework.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>This study proposes an innovative research framework that integrates population-level multimorbidity pattern recognition with individual-level future prediction, achieving a key transition in multimorbidity research from descriptive statistics to prospective prediction.</p><p>In terms of multimorbidity pattern recognition, this study identified 5 clinically meaningful classes using LTA: Cardiometabolic-Multisystem, Hypertension-Arthritis, Respiratory-Musculoskeletal, Metabolic Syndrome, and Gastritis-Arthritis. Patients in the Cardiometabolic-Multisystem pattern bore the heaviest overall disease burden, with widespread hypertension, heart disease, dyslipidemia, diabetes, gastritis, arthritis, lung disease, and kidney disease, while the coexistence of stroke further indicated advanced vascular damage. The interactions and cascading effects among these chronic conditions have been confirmed in multiple studies. For instance, cardiometabolic diseases such as hypertension, diabetes, dyslipidemia, and heart disease frequently co-occur and significantly increase the risk of cardiovascular events and mortality [<xref ref-type="bibr" rid="ref50">50</xref>]. Inflammatory arthritis (eg, rheumatoid arthritis) has also been associated with elevated cardiovascular risk [<xref ref-type="bibr" rid="ref54">54</xref>]. This multisystem involvement highlights the importance of comprehensive and integrated management for such patients. The Hypertension-Arthritis pattern was characterized by the strong co-occurrence of hypertension and arthritis. This pattern is well supported by prior studies showing that hypertension is a common multimorbidity of osteoarthritis, potentially linked through shared mechanisms such as inflammation, oxidative stress, and vascular dysfunction, with arthritis treatments possibly influencing blood pressure [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref55">55</xref>]. The Respiratory-Musculoskeletal pattern was defined by a high prevalence of lung disease, arthritis, and asthma, often accompanied by gastritis and hypertension. This finding is consistent with previous clinical evidence [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref56">56</xref>]. The Metabolic Syndrome class exhibited high rates of hypertension, dyslipidemia, heart disease, and diabetes, without significant multisystem involvement. This aligns with the classical definition of metabolic syndrome and its strong association with increased risks of cardiovascular disease and type 2 diabetes [<xref ref-type="bibr" rid="ref57">57</xref>]. Finally, the Gastritis-Arthritis pattern showed the lowest overall disease burden, primarily involving arthritis and gastritis. This finding is supported by prior medical studies reporting comorbidities such as enteropathic arthritis in patients with inflammatory bowel disease or gastrointestinal complications related to arthritis medications [<xref ref-type="bibr" rid="ref58">58</xref>].</p><p>In multimorbidity pattern prediction, this study introduces CLA-Net, a novel deep learning framework specifically designed to forecast multimorbidity patterns from longitudinal health data with short temporal lags. Our comprehensive evaluation demonstrated that CLA-Net consistently outperformed all baseline methods, achieving an accuracy of 0.8352 and an AUC of 0.9293. Several strong deep learning baselines included in the comparison, such as Mamba, PatchTST, and iTransformer, were originally developed for long-sequence time series forecasting and are optimized to exploit long-range temporal dependencies through mechanisms such as selective state-space modeling or patch-based representations. In the present task, the input sequence consists of 2 adjacent time points <italic>t&#x2212;1</italic> and <italic>t</italic>, which may limit the extent to which these architectures can fully leverage their design strengths. In contrast, CLA-Net explicitly focuses on modeling short-lag transitions through a dual-branch architecture and a bitemporal directed cross-attention mechanism, enabling more effective capture of immediate disease-state dependencies. Compared with the best-performing baseline Mamba model, CLA-Net improved accuracy by 1.10% (0.8352 vs 0.8242) and by 1.05% over the hybrid LSTM+transformer architecture (0.8352 vs 0.8247). More notably, CLA-Net increased AUC by 1.87% relative to LSTM+transformer (0.9293 vs 0.9106), indicating superior discriminative ability across decision thresholds, which is particularly important for clinical applications requiring flexible risk stratification strategies.</p><p>Our ablation experiments revealed the critical contributions of individual components and validated the soundness of the architectural design. Only the transformer branch reduced accuracy to 0.8166 (&#x2212;1.86%), highlighting its essential role in capturing global cross-temporal dependencies. Retaining only the GRU branch further decreased accuracy to 0.8102 (&#x2212;2.50%), indicating that while GRU effectively models temporal dynamics, the absence of global feature interactions severely limits its ability to capture complex multimorbidity patterns. The performance gap between the 2 branches (0.8166 vs 0.8102) also suggests that global interactions are slightly more important than local temporal dependencies, yet their integration is necessary to achieve optimal performance. Replacing the bitemporal directed cross-attention with standard self-attention reduced accuracy to 0.8244 (&#x2212;0.93%), with precision and <italic>F</italic><sub>1</sub>- scores declining by 1.08% and 1.12%, respectively. This underscores the importance of the directed mechanism in maintaining predictive stability and balance. Unlike standard self-attention, which may cause &#x201C;future information leakage,&#x201D; the dual-window design enforces a &#x201C;history-to-current&#x201D; information flow that better aligns with the clinical logic of predicting future risk based on historical data.</p><p>In terms of input configuration, single-time-point input resulted in a sharp drop in accuracy to 0.7985 (&#x2212;3.67%) and recall to 0.7964 (&#x2212;3.48%), indicating that the absence of historical information causes the model to miss many true multimorbidity cases, likely because early signals of certain patterns can only be detected through temporal evolution. Interestingly, adding more historical time points (3 or 4) did not improve overall performance: with 3 time points, precision slightly increased (0.8355 vs 0.8334) but accuracy dropped to 0.8228 and recall to 0.8185; with 4 time points, recall reached its highest (0.8360), but accuracy (0.8268) and <italic>F</italic><sub>1</sub>-score (0.8262) fell below the 2-time-point configuration. This empirical finding suggests that for this specific multimorbidity prediction task, immediate history is significantly more predictive than long-term history. Including more distant waves (eg, from 6&#x2010;8 years prior) appears to introduce noise or weaker relevance that obscures the strong signals from the most recent health state transition. Therefore, the 2-time-point configuration effectively captures the critical &#x201C;immediate&#x201D; evolution while minimizing interference from outdated information, thus striking the best balance between information richness and model complexity. Importantly, this temporal design also has clear implications for practical clinical use. By leveraging a &#x201C;past&#x2013;current&#x201D; window spanning approximately 2&#x2010;3 years to predict multimorbidity patterns 2&#x2010;3 years into the future, CLA-Net is not intended for short-term clinical decision-making (eg, acute care), but rather for midterm risk stratification and early intervention planning. In real-world settings, such predictions could be used to identify individuals who are likely to transition into more complex or higher-burden multimorbidity patterns within the next few years, thereby providing a clinically meaningful window for intervention. Finally, reversing the order of GRU and transformer (GRU-transformer) lowered accuracy to 0.8235 (&#x2212;1.17%), confirming the effectiveness of the &#x201C;temporal encoding before interaction&#x201D; design. GRU provides temporally aware representations that serve as more suitable inputs for subsequent attention mechanisms, enabling more effective cross-temporal interactions, whereas applying attention without temporal context may lead to suboptimal feature modeling.</p><p>Considering the chronic and partially irreversible nature of many included conditions, class persistence is expected to be high. Consistent with this, approximately 76.53% of test instances were stayers. While the naive model achieves high accuracy on stable patients, it has zero clinical utility for risk warning, as it fails to identify any patient whose health state is deteriorating or changing (0% accuracy on the transition subset). In contrast, CLA-Net successfully identifies 76.54% of the patients who undergo pattern transitions. From a clinical perspective, the primary value of a predictive model lies in its ability to provide early warnings for high-risk transitions rather than merely confirming stability. Therefore, the complexity of the deep learning approach is justified by its ability to capture these critical, nonlinear disease progressions that a simple persistence rule completely misses.</p><p>Regarding generalizability, the proposed framework is designed to be transferable to other longitudinal electronic health record (EHR) datasets. The LTA component relies on routinely collected chronic disease diagnoses, which are widely available across EHR systems, allowing population-level pattern structures to be reestimated or adapted to different health care contexts. Meanwhile, the CLA-Net architecture operates on generic temporal feature representations and does not depend on dataset-specific coding schemes. This modular design facilitates scalability to larger cohorts and longer observation periods and enables potential transfer learning scenarios in which a pretrained representation model can be fine-tuned using locally derived multimorbidity structures. These properties suggest that the framework has the potential for broader application beyond the present dataset. However, due to the inherent survivorship bias of longitudinal follow-up cohorts, the current model is primarily applicable to chronic disease management in surviving, community-dwelling populations. Importantly, it should not be applied to high-acuity clinical settings (eg, emergency or Intensive Care Unit populations) where mortality risk is a competing outcome and may dominate near-term trajectories. In such contexts, mortality-aware endpoints and competing-risk modeling would be required prior to any clinical deployment.</p></sec><sec id="s4-2"><title>Practical Implications</title><p>The ability of CLA-Net to anticipate future multimorbidity pattern membership offers important implications for the longitudinal management of patients with established chronic conditions. Rather than responding to isolated disease events, clinicians can leverage pattern-based forecasts to organize care around expected configurations of coexisting conditions, thereby supporting a more coordinated and forward-looking approach to chronic disease management.</p><p>First, knowledge of an individual&#x2019;s likely future multimorbidity pattern can inform proactive care planning within patients with multimorbidity. For example, individuals predicted to enter the metabolic syndrome pattern may benefit from earlier emphasis on lifestyle modification, metabolic regulation, and adherence support, with the aim of slowing progression toward more complex cardiometabolic profiles. Similarly, those expected to belong to the cardiometabolic-multisystem pattern can be prioritized for closer surveillance and integrated management across cardiovascular, metabolic, and musculoskeletal domains, helping to mitigate cumulative disease burden over time.</p><p>Second, pattern-level prediction provides a practical framework for structuring multidisciplinary collaboration. Instead of relying on uniform or reactive referral pathways, health care providers may tailor multidisciplinary team composition according to the anticipated constellation of conditions. For instance, the respiratory-musculoskeletal pattern underscores the value of coordinated input from pulmonology, rheumatology, and rehabilitation services to jointly address respiratory impairment and functional limitation. Likewise, the gastritis-arthritis pattern highlights the need for alignment between gastroenterology and pain management to balance long-term anti-inflammatory therapy with gastrointestinal protection.</p><p>At the organizational level, pattern-based predictions can facilitate more efficient allocation of follow-up intensity, monitoring priorities, and supportive services for patients with multimorbidity within long-term chronic care systems. Consistent with its design scope, the proposed framework does not seek to model acute deterioration, end-stage progression, or mortality-related outcomes. Rather, it functions as a scalable decision-support layer to identify patients who may benefit from anticipatory, pattern-oriented management strategies in settings focused on sustained multimorbidity care.</p></sec><sec id="s4-3"><title>Limitations and Future Direction</title><p>Although this study has made significant progress in multimorbidity pattern prediction, several areas remain for improvement. First, although CLA-Net demonstrated strong predictive performance on a longitudinal cohort, the current evaluation was conducted using a single data source. Differences in population characteristics, disease coding systems, and follow-up structures across EHR datasets may affect model behavior and warrant further investigation. Future work will focus on validating the framework across diverse EHR datasets, exploring transfer learning strategies, and extending the model to other chronic disease domains and health care systems.</p><p>Second, this study does not explicitly incorporate medication-related information into the modeling framework. Chronic conditions were therefore represented as binary indicators of presence or absence, without distinguishing between actively treated and untreated disease states. This abstraction is appropriate for predicting future multimorbidity pattern membership at the population level, but it may obscure clinically relevant differences in disease control achieved through pharmacological management. Consequently, the predicted pattern assignments should be interpreted as reflecting disease co-occurrence status rather than treatment-adjusted clinical responses. Future work integrating longitudinal medication data may further enhance the clinical granularity of multimorbidity pattern prediction.</p><p>Third, the model&#x2019;s predictive performance reflects a combined but functionally distinct reliance on objective disease history and subjective self-assessment measures, such as self-rated health and self-rated memory. While objective disease variables anchor predictions to diagnosed multimorbidity status, subjective assessments provide higher-level summaries of perceived health and functional burden and may contribute more strongly for individuals with borderline or heterogeneous disease profiles. Although the proposed framework demonstrates strong overall predictive performance, the relative contributions of subjective versus objective inputs cannot be fully disentangled within the current framework without dedicated interpretability analyses, such as feature attribution or attention visualization. Addressing this limitation and providing greater transparency regarding the sources of predictive power will be a key focus of future work.</p><p>Fourth, although the proposed framework demonstrates strong predictive performance, incorporating detailed interpretability analyses, such as SHAP (Shapley Additive Explanations) values or attention weight visualizations, may further enhance its practical utility. Owing to the complexity of the model architecture, such analyses are beyond the scope of this study. Future research will focus on developing tailored interpretability strategies to better elucidate the learned multimorbidity representations and support clinical interpretation.</p><p>Fifth, regarding temporal granularity, the survey waves in the CHARLS dataset are separated by nonuniform intervals, ranging from approximately 2 to 3 years. In the current CLA-Net architecture, transitions between adjacent waves are modeled as equidistant steps, without explicitly encoding the varying time intervals. This simplification may introduce temporal noise into the estimation of transition dynamics, particularly when interpreting changes across waves with heterogeneous follow-up durations. Future work could address this limitation by incorporating time-aware representations or modeling strategies explicitly designed for irregular temporal intervals.</p><p>Another methodological consideration concerns the transformation of probabilistic latent class assignments into deterministic labels for supervised prediction. In this study, posterior class probabilities obtained from the LTA were converted into hard labels using a maximum posterior probability assignment strategy. While this approach provides a clear and stable supervisory signal for training the prediction model, it inevitably discards information regarding classification uncertainty. In particular, individuals with ambiguous or borderline posterior distributions are treated as belonging entirely to a single class, which may encourage overconfident predictions. Future work could address this limitation by leveraging soft targets, such as full posterior probability vectors, within the learning objective. Incorporating probabilistic supervision through distribution-based loss functions or uncertainty-aware architectures may enable a more nuanced representation of multimorbidity structure and further improve predictive robustness.</p><p>Finally, to ensure the integrity of longitudinal trajectory modeling and the stability of LTA patterns, this study included participants present in all 5 waves (2011&#x2010;2020). We acknowledge that excluding individuals who died or dropped out due to severe illness may introduce survivorship bias, potentially skewing the dataset toward a &#x201C;healthier survivor&#x201D; cohort. However, this inclusion criterion was a necessary methodological trade-off to accurately capture the continuous evolution of multimorbidity patterns. It is worth noting that the primary goal of this framework is to support chronic disease management and secondary prevention in the general population, rather than predicting acute mortality risk in end-stage patients. To address this limitation, future research could incorporate advanced strategies such as joint longitudinal-survival modeling, inverse probability-of-censoring weighting, or competing-risk frameworks to explicitly account for nonrandom attrition and mortality, thereby extending the model&#x2019;s generalizability to high-risk clinical populations.</p></sec><sec id="s4-4"><title>Conclusion</title><p>This study addresses the complex challenges of multimorbidity management in the context of population aging by proposing an innovative framework that bridges population-level pattern recognition and individual risk prediction. Using LTA, we identified multimorbidity patterns with temporal consistency and clinical stability (ie, Cardiometabolic-Multisystem, Hypertension-Arthritis, Respiratory-Musculoskeletal, Metabolic Syndrome, and Gastritis-Arthritis), which were then transformed into predictive labels to develop the CLA-Net deep learning model. CLA-Net integrates the sequential modeling capacity of GRU with the feature interaction advantages of a bitemporal directed cross-attention mechanism, achieving superior performance in capturing both temporal dependencies and complex feature interactions in chronic disease progression. Experimental results demonstrated that CLA-Net significantly outperformed existing methods across accuracy, recall, precision, <italic>F</italic><sub>1</sub>-score, and AUC.</p></sec></sec></body><back><ack><p>This work is supported by the Beijing Logistics Informatics Research Base. We used ChatGPT-5 for language polishing.</p></ack><notes><sec><title>Funding</title><p>This work was supported by the National Social Science Foundation of China (grant number 18ZDA086), the National Natural Science Foundation of China (grant number 62173025), the Fundamental Research Funds for the Central Universities (grant number 2025-JYB-XJSJJ-022), and Beijing Logistics Informatics Research Base.</p></sec><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: QZ, WM</p><p>Data curation: QZ, WM</p><p>Formal analysis: QZ</p><p>Funding acquisition: RZ</p><p>Investigation: QZ</p><p>Methodology: QZ, WM, BZ</p><p>Project administration: RZ, WM, XZ</p><p>Resources: RZ, WM</p><p>Software: QZ</p><p>Validation: QZ, WM, BZ</p><p>Writing &#x2013; original draft: QZ</p><p>Writing &#x2013; review &#x0026; editing: QZ, RZ, WM, BZ, XZ</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AIC</term><def><p>Akaike Information Criterion</p></def></def-item><def-item><term id="abb2">AU-PRC</term><def><p>area under the precision-recall curve</p></def></def-item><def-item><term id="abb3">AUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb4">BIC</term><def><p>Bayesian Information Criterion</p></def></def-item><def-item><term id="abb5">BLRT</term><def><p>Bootstrap Likelihood Ratio Test</p></def></def-item><def-item><term id="abb6">CCA</term><def><p>complete case analysis</p></def></def-item><def-item><term id="abb7">CHARLS</term><def><p>China Health and Retirement Longitudinal Study</p></def></def-item><def-item><term id="abb8">CLA-Net</term><def><p>Cross-Lag Attention Network</p></def></def-item><def-item><term id="abb9">CNN</term><def><p>convolutional neural network</p></def></def-item><def-item><term id="abb10">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb11">FFN</term><def><p>feed-forward network</p></def></def-item><def-item><term id="abb12">FPR</term><def><p>false positive rate</p></def></def-item><def-item><term id="abb13">GRU</term><def><p>Gated Recurrent Unit</p></def></def-item><def-item><term id="abb14">KNN</term><def><p>k-nearest neighbors</p></def></def-item><def-item><term id="abb15">LCA</term><def><p>latent class analysis</p></def></def-item><def-item><term id="abb16">LSTM</term><def><p>long short-term memory</p></def></def-item><def-item><term id="abb17">LTA</term><def><p>latent transition analysis</p></def></def-item><def-item><term id="abb18">PR</term><def><p>precision-recall</p></def></def-item><def-item><term id="abb19">ROC</term><def><p>receiver operating characteristic</p></def></def-item><def-item><term id="abb20">SaBIc</term><def><p>sample-size adjusted Bayesian Information Criterion</p></def></def-item><def-item><term id="abb21">SHAP</term><def><p>Shapley Additive Explanations</p></def></def-item><def-item><term id="abb22">TPR</term><def><p>true positive rate</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>MacMahon</surname><given-names>S</given-names> </name><name name-style="western"><surname>Calverley</surname><given-names>P</given-names> </name><name name-style="western"><surname>Chaturvedi</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Multimorbidity: a priority for global health research</article-title><year>2018</year><access-date>2026-02-20</access-date><publisher-name>The Academy of Medical Sciences</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://acmedsci.ac.uk/file-download/82222577">https://acmedsci.ac.uk/file-download/82222577</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Skou</surname><given-names>ST</given-names> </name><name name-style="western"><surname>Mair</surname><given-names>FS</given-names> </name><name name-style="western"><surname>Fortin</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Multimorbidity</article-title><source>Nat Rev Dis Primers</source><year>2022</year><month>07</month><day>14</day><volume>8</volume><issue>1</issue><fpage>48</fpage><pub-id pub-id-type="doi">10.1038/s41572-022-00376-4</pub-id><pub-id pub-id-type="medline">35835758</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Du</surname><given-names>Y</given-names> </name><name name-style="western"><surname>de Bock</surname><given-names>GH</given-names> </name><name name-style="western"><surname>Vonk</surname><given-names>JM</given-names> </name><etal/></person-group><article-title>Lifestyle factors and incident multimorbidity related to chronic disease: a population-based cohort study</article-title><source>Eur J Ageing</source><year>2024</year><month>11</month><day>28</day><volume>21</volume><issue>1</issue><fpage>37</fpage><pub-id pub-id-type="doi">10.1007/s10433-024-00833-x</pub-id><pub-id pub-id-type="medline">39609306</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nunes</surname><given-names>BP</given-names> </name><name name-style="western"><surname>Flores</surname><given-names>TR</given-names> </name><name name-style="western"><surname>Mielke</surname><given-names>GI</given-names> </name><name name-style="western"><surname>Thum&#x00E9;</surname><given-names>E</given-names> </name><name name-style="western"><surname>Facchini</surname><given-names>LA</given-names> </name></person-group><article-title>Multimorbidity and mortality in older adults: a systematic review and meta-analysis</article-title><source>Arch Gerontol Geriatr</source><year>2016</year><volume>67</volume><fpage>130</fpage><lpage>138</lpage><pub-id pub-id-type="doi">10.1016/j.archger.2016.07.008</pub-id><pub-id pub-id-type="medline">27500661</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fortin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lapointe</surname><given-names>L</given-names> </name><name name-style="western"><surname>Hudon</surname><given-names>C</given-names> </name><name name-style="western"><surname>Vanasse</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ntetu</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Maltais</surname><given-names>D</given-names> </name></person-group><article-title>Multimorbidity and quality of life in primary care: a systematic review</article-title><source>Health Qual Life Outcomes</source><year>2004</year><month>09</month><day>20</day><volume>2</volume><issue>1</issue><fpage>51</fpage><pub-id pub-id-type="doi">10.1186/1477-7525-2-51</pub-id><pub-id pub-id-type="medline">15380021</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Violan</surname><given-names>C</given-names> </name><name name-style="western"><surname>Foguet-Boreu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Flores-Mateo</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Prevalence, determinants and patterns of multimorbidity in primary care: a systematic review of observational studies</article-title><source>PLoS One</source><year>2014</year><volume>9</volume><issue>7</issue><fpage>e102149</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0102149</pub-id><pub-id pub-id-type="medline">25048354</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Prados-Torres</surname><given-names>A</given-names> </name><name name-style="western"><surname>Calder&#x00F3;n-Larra&#x00F1;aga</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hancco-Saavedra</surname><given-names>J</given-names> </name><name name-style="western"><surname>Poblador-Plou</surname><given-names>B</given-names> </name><name name-style="western"><surname>van den Akker</surname><given-names>M</given-names> </name></person-group><article-title>Multimorbidity patterns: a systematic review</article-title><source>J Clin Epidemiol</source><year>2014</year><month>03</month><volume>67</volume><issue>3</issue><fpage>254</fpage><lpage>266</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2013.09.021</pub-id><pub-id pub-id-type="medline">24472295</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dong</surname><given-names>X</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>P</given-names> </name></person-group><article-title>Individual-level transitions between chronic disease multimorbidity clusters and the risk of five-year mortality in longitudinal cohort of Chinese middle-aged and older adults</article-title><source>Aging Clin Exp Res</source><year>2025</year><month>07</month><day>9</day><volume>37</volume><issue>1</issue><fpage>216</fpage><pub-id pub-id-type="doi">10.1007/s40520-025-03078-5</pub-id><pub-id pub-id-type="medline">40632375</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Muth</surname><given-names>C</given-names> </name><name name-style="western"><surname>Blom</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>SM</given-names> </name><etal/></person-group><article-title>Evidence supporting the best clinical management of patients with multimorbidity and polypharmacy: a systematic guideline review and expert consensus</article-title><source>J Intern Med</source><year>2019</year><month>03</month><volume>285</volume><issue>3</issue><fpage>272</fpage><lpage>288</lpage><pub-id pub-id-type="doi">10.1111/joim.12842</pub-id><pub-id pub-id-type="medline">30357955</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>L</given-names> </name><name name-style="western"><surname>Ba</surname><given-names>Z</given-names> </name></person-group><article-title>Extracting multifaceted characteristics of patients with chronic disease comorbidity: framework development using large language models</article-title><source>JMIR Med Inform</source><year>2025</year><month>05</month><day>15</day><volume>13</volume><issue>1</issue><fpage>e70096</fpage><pub-id pub-id-type="doi">10.2196/70096</pub-id><pub-id pub-id-type="medline">40373298</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Held</surname><given-names>FP</given-names> </name><name name-style="western"><surname>Blyth</surname><given-names>F</given-names> </name><name name-style="western"><surname>Gnjidic</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Association rules analysis of comorbidity and multimorbidity: the concord health and aging in men project</article-title><source>J Gerontol A Biol Sci Med Sci</source><year>2016</year><month>05</month><volume>71</volume><issue>5</issue><fpage>625</fpage><lpage>631</lpage><pub-id pub-id-type="doi">10.1093/gerona/glv181</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>He</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ou</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>Prevalence of common chronic disease and multimorbidity patterns in Guangdong province with three typical cultures: analysis of data from the Diverse Life-Course Cohort study</article-title><source>Front Public Health</source><year>2023</year><month>05</month><day>4</day><volume>11</volume><fpage>1163791</fpage><pub-id pub-id-type="doi">10.3389/fpubh.2023.1163791</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ng</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Tawiah</surname><given-names>R</given-names> </name><name name-style="western"><surname>Sawyer</surname><given-names>M</given-names> </name><name name-style="western"><surname>Scuffham</surname><given-names>P</given-names> </name></person-group><article-title>Patterns of multimorbid health conditions: a systematic review of analytical methods and comparison analysis</article-title><source>Int J Epidemiol</source><year>2018</year><month>10</month><day>1</day><volume>47</volume><issue>5</issue><fpage>1687</fpage><lpage>1704</lpage><pub-id pub-id-type="doi">10.1093/ije/dyy134</pub-id><pub-id pub-id-type="medline">30016472</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>He</surname><given-names>P</given-names> </name><name name-style="western"><surname>Yao</surname><given-names>H</given-names> </name><etal/></person-group><article-title>A network-based study reveals multimorbidity patterns in people with type 2 diabetes</article-title><source>iScience</source><year>2023</year><month>10</month><volume>26</volume><issue>10</issue><fpage>107979</fpage><pub-id pub-id-type="doi">10.1016/j.isci.2023.107979</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nicolet</surname><given-names>A</given-names> </name><name name-style="western"><surname>Assouline</surname><given-names>D</given-names> </name><name name-style="western"><surname>Le Pogam</surname><given-names>MA</given-names> </name><etal/></person-group><article-title>Exploring patient multimorbidity and complexity using health insurance claims data: a cluster analysis approach</article-title><source>JMIR Med Inform</source><year>2022</year><month>04</month><day>4</day><volume>10</volume><issue>4</issue><fpage>e34274</fpage><pub-id pub-id-type="doi">10.2196/34274</pub-id><pub-id pub-id-type="medline">35377334</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>R</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>X</given-names> </name></person-group><article-title>What can we learn from multimorbidity? A deep dive from its risk patterns to the corresponding patient profiles</article-title><source>Decis Support Syst</source><year>2024</year><month>11</month><volume>186</volume><fpage>114313</fpage><pub-id pub-id-type="doi">10.1016/j.dss.2024.114313</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Duan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Fang</surname><given-names>Y</given-names> </name></person-group><article-title>Identifying and predicting physical limitation and cognitive decline trajectory group of older adults in China: a data-driven machine learning analysis</article-title><source>J Affect Disord</source><year>2024</year><month>04</month><day>1</day><volume>350</volume><fpage>590</fpage><lpage>599</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2024.01.095</pub-id><pub-id pub-id-type="medline">38218258</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Strauss</surname><given-names>VY</given-names> </name><name name-style="western"><surname>Jones</surname><given-names>PW</given-names> </name><name name-style="western"><surname>Kadam</surname><given-names>UT</given-names> </name><name name-style="western"><surname>Jordan</surname><given-names>KP</given-names> </name></person-group><article-title>Distinct trajectories of multimorbidity in primary care were identified using latent class growth analysis</article-title><source>J Clin Epidemiol</source><year>2014</year><month>10</month><volume>67</volume><issue>10</issue><fpage>1163</fpage><lpage>1171</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2014.06.003</pub-id><pub-id pub-id-type="medline">25063556</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zacar&#x00ED;as-Pons</surname><given-names>L</given-names> </name><name name-style="western"><surname>Vilalta-Franch</surname><given-names>J</given-names> </name><name name-style="western"><surname>Turr&#x00F3;-Garriga</surname><given-names>O</given-names> </name><name name-style="western"><surname>Saez</surname><given-names>M</given-names> </name><name name-style="western"><surname>Garre-Olmo</surname><given-names>J</given-names> </name></person-group><article-title>Multimorbidity patterns and their related characteristics in European older adults: a longitudinal perspective</article-title><source>Arch Gerontol Geriatr</source><year>2021</year><volume>95</volume><fpage>104428</fpage><pub-id pub-id-type="doi">10.1016/j.archger.2021.104428</pub-id><pub-id pub-id-type="medline">33991948</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Collins</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Lanza</surname><given-names>ST</given-names> </name></person-group><source>Latent Class and Latent Transition Analysis: With Applications in the Social, Behavioral, and Health Sciences</source><year>2009</year><publisher-name>John Wiley &#x0026; Sons</publisher-name><pub-id pub-id-type="other">978-0-470-22839-5</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Latent transition analysis of instrumental activities of daily living in Chinese elderly: based on the 2014&#x2013;2018 wave of the Chinese Longitudinal Healthy Longevity Survey</article-title><source>BMC Geriatr</source><year>2024</year><month>01</month><day>22</day><volume>24</volume><issue>1</issue><fpage>83</fpage><pub-id pub-id-type="doi">10.1186/s12877-023-04631-5</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Paukner</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ladner</surname><given-names>DP</given-names> </name><collab>CAPriCORN Team</collab><name name-style="western"><surname>Zhao</surname><given-names>L</given-names> </name></person-group><article-title>Measuring disease burden with individual cumulative incidence in patients with cirrhosis</article-title><source>J Biomed Inform</source><year>2025</year><month>09</month><volume>169</volume><fpage>104883</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2025.104883</pub-id><pub-id pub-id-type="medline">40752671</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zeng</surname><given-names>H</given-names> </name><name name-style="western"><surname>Miao</surname><given-names>C</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Influence of comorbidity of chronic diseases on basic activities of daily living among older adults in China: a propensity score-matched study</article-title><source>Front Public Health</source><year>2024</year><volume>12</volume><fpage>1292289</fpage><pub-id pub-id-type="doi">10.3389/fpubh.2024.1292289</pub-id><pub-id pub-id-type="medline">38638478</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Uddin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hajati</surname><given-names>F</given-names> </name><name name-style="western"><surname>Khushi</surname><given-names>M</given-names> </name></person-group><article-title>Comorbidity and multimorbidity prediction of major chronic diseases using machine learning and network analytics</article-title><source>Expert Syst Appl</source><year>2022</year><month>11</month><volume>205</volume><fpage>117761</fpage><pub-id pub-id-type="doi">10.1016/j.eswa.2022.117761</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>J</given-names> </name><name name-style="western"><surname>Yoo</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cha</surname><given-names>WC</given-names> </name><name name-style="western"><surname>Paik</surname><given-names>H</given-names> </name></person-group><article-title>Identifying the risk of sepsis in patients with cancer using digital health care records: machine learning-based approach</article-title><source>JMIR Med Inform</source><year>2022</year><month>06</month><day>15</day><volume>10</volume><issue>6</issue><fpage>e37689</fpage><pub-id pub-id-type="doi">10.2196/37689</pub-id><pub-id pub-id-type="medline">35704364</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nylund-Gibson</surname><given-names>K</given-names> </name><name name-style="western"><surname>Grimm</surname><given-names>R</given-names> </name><name name-style="western"><surname>Quirk</surname><given-names>M</given-names> </name><name name-style="western"><surname>Furlong</surname><given-names>M</given-names> </name></person-group><article-title>A latent transition mixture model using the three-step specification</article-title><source>Struct Equ Modeling</source><year>2014</year><month>07</month><day>3</day><volume>21</volume><issue>3</issue><fpage>439</fpage><lpage>454</lpage><pub-id pub-id-type="doi">10.1080/10705511.2014.915375</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vrieze</surname><given-names>SI</given-names> </name></person-group><article-title>Model selection and psychological theory: a discussion of the differences between the Akaike information criterion (AIC) and the Bayesian information criterion (BIC)</article-title><source>Psychol Methods</source><year>2012</year><month>06</month><volume>17</volume><issue>2</issue><fpage>228</fpage><lpage>243</lpage><pub-id pub-id-type="doi">10.1037/a0027127</pub-id><pub-id pub-id-type="medline">22309957</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lorah</surname><given-names>J</given-names> </name><name name-style="western"><surname>Womack</surname><given-names>A</given-names> </name></person-group><article-title>Value of sample size for computation of the Bayesian information criterion (BIC) in multilevel modeling</article-title><source>Behav Res</source><year>2019</year><month>02</month><volume>51</volume><issue>1</issue><fpage>440</fpage><lpage>450</lpage><pub-id pub-id-type="doi">10.3758/s13428-018-1188-3</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sclove</surname><given-names>SL</given-names> </name></person-group><article-title>Application of model-selection criteria to some problems in multivariate analysis</article-title><source>Psychometrika</source><year>1987</year><month>09</month><access-date>2025-08-01</access-date><volume>52</volume><issue>3</issue><fpage>333</fpage><lpage>343</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.cambridge.org/core/journals/psychometrika/article/application-of-modelselection-criteria-to-some-problems-in-multivariate-analysis/C610E1E1D7EEF9338AE19EC9E7AE1F7C">https://www.cambridge.org/core/journals/psychometrika/article/application-of-modelselection-criteria-to-some-problems-in-multivariate-analysis/C610E1E1D7EEF9338AE19EC9E7AE1F7C</ext-link></comment><pub-id pub-id-type="doi">10.1007/BF02294360</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Larose</surname><given-names>C</given-names> </name><name name-style="western"><surname>Harel</surname><given-names>O</given-names> </name><name name-style="western"><surname>Kordas</surname><given-names>K</given-names> </name><name name-style="western"><surname>Dey</surname><given-names>DK</given-names> </name></person-group><article-title>Latent class analysis of incomplete data via an entropy-based criterion</article-title><source>Stat Methodol</source><year>2016</year><month>09</month><volume>32</volume><fpage>107</fpage><lpage>121</lpage><pub-id pub-id-type="doi">10.1016/j.stamet.2016.04.004</pub-id><pub-id pub-id-type="medline">27695391</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Park</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>HJ</given-names> </name><name name-style="western"><surname>Duffy</surname><given-names>RD</given-names> </name></person-group><article-title>Using latent transition analysis to explore changes in decent work across time</article-title><source>J Couns Psychol</source><year>2025</year><month>01</month><volume>72</volume><issue>1</issue><fpage>80</fpage><lpage>91</lpage><pub-id pub-id-type="doi">10.1037/cou0000773</pub-id><pub-id pub-id-type="medline">39680017</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dziak</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Lanza</surname><given-names>ST</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>X</given-names> </name></person-group><article-title>Effect size, statistical power and sample size requirements for the bootstrap likelihood ratio test in latent class analysis</article-title><source>Struct Equ Modeling</source><year>2014</year><month>01</month><day>1</day><volume>21</volume><issue>4</issue><fpage>534</fpage><lpage>552</lpage><pub-id pub-id-type="doi">10.1080/10705511.2014.919819</pub-id><pub-id pub-id-type="medline">25328371</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rinne</surname><given-names>LF</given-names> </name><name name-style="western"><surname>Ye</surname><given-names>A</given-names> </name><name name-style="western"><surname>Jordan</surname><given-names>NC</given-names> </name></person-group><article-title>Development of fraction comparison strategies: a latent transition analysis</article-title><source>Dev Psychol</source><year>2017</year><month>04</month><volume>53</volume><issue>4</issue><fpage>713</fpage><lpage>730</lpage><pub-id pub-id-type="doi">10.1037/dev0000275</pub-id><pub-id pub-id-type="medline">28221051</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kalantar</surname><given-names>B</given-names> </name><name name-style="western"><surname>Pradhan</surname><given-names>B</given-names> </name><name name-style="western"><surname>Naghibi</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Motevalli</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mansor</surname><given-names>S</given-names> </name></person-group><article-title>Assessment of the effects of training data selection on the landslide susceptibility mapping: a comparison between support vector machine (SVM), logistic regression (LR) and artificial neural networks (ANN)</article-title><source>Geomat Nat Hazards Risk</source><year>2018</year><month>01</month><day>1</day><volume>9</volume><issue>1</issue><fpage>49</fpage><lpage>69</lpage><pub-id pub-id-type="doi">10.1080/19475705.2017.1407368</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Belgiu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Dr&#x0103;gu&#x0163;</surname><given-names>L</given-names> </name></person-group><article-title>Random forest in remote sensing: a review of applications and future directions</article-title><source>ISPRS J Photogramm Remote Sens</source><year>2016</year><month>04</month><volume>114</volume><fpage>24</fpage><lpage>31</lpage><pub-id pub-id-type="doi">10.1016/j.isprsjprs.2016.01.011</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ogunleye</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>QG</given-names> </name></person-group><article-title>XGBoost model for chronic kidney disease diagnosis</article-title><source>IEEE/ACM Trans Comput Biol Bioinform</source><year>2020</year><volume>17</volume><issue>6</issue><fpage>2131</fpage><lpage>2140</lpage><pub-id pub-id-type="doi">10.1109/TCBB.2019.2911071</pub-id><pub-id pub-id-type="medline">30998478</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lecun</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bottou</surname><given-names>L</given-names> </name><name name-style="western"><surname>Bengio</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Haffner</surname><given-names>P</given-names> </name></person-group><article-title>Gradient-based learning applied to document recognition</article-title><source>Proc IEEE</source><year>1998</year><volume>86</volume><issue>11</issue><fpage>2278</fpage><lpage>2324</lpage><pub-id pub-id-type="doi">10.1109/5.726791</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hochreiter</surname><given-names>S</given-names> </name><name name-style="western"><surname>Schmidhuber</surname><given-names>J</given-names> </name></person-group><article-title>Long short-term memory</article-title><source>Neural Comput</source><year>1997</year><month>11</month><day>15</day><volume>9</volume><issue>8</issue><fpage>1735</fpage><lpage>1780</lpage><pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id><pub-id pub-id-type="medline">9377276</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Vaswani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Shazeer</surname><given-names>N</given-names> </name><name name-style="western"><surname>Parmar</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Attention is all you need</article-title><access-date>2025-09-09</access-date><conf-name>31st Conference on Neural Information Processing Systems (NIPS 2017)</conf-name><conf-date>Dec 4-9, 2017</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://proceedings.neurips.cc/paper_files/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html">https://proceedings.neurips.cc/paper_files/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html</ext-link></comment></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Nie</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Nguyen</surname><given-names>NH</given-names> </name><name name-style="western"><surname>Sinthong</surname><given-names>P</given-names> </name><name name-style="western"><surname>Kalagnanam</surname><given-names>J</given-names> </name></person-group><article-title>A time series is worth 64 words: long-term forecasting with transformers</article-title><source>arXiv</source><comment>Preprint posted online on 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2211.14730</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>H</given-names> </name><etal/></person-group><article-title>ITransformer: inverted transformers are effective for time series forecasting</article-title><source>arXiv</source><comment>Preprint posted online on 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2310.06625</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Gu</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dao</surname><given-names>T</given-names> </name></person-group><article-title>Mamba: linear-time sequence modeling with selective state spaces</article-title><source>arXiv</source><comment>Preprint posted online on 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2312.00752</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Cai</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Yao</surname><given-names>Y</given-names> </name></person-group><article-title>MambaTS: improved selective state space models for long-term time series forecasting</article-title><source>arXiv</source><comment>Preprint posted online on 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2405.16440</pub-id><pub-id pub-id-type="medline">38800661</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stekhoven</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>B&#x00FC;hlmann</surname><given-names>P</given-names> </name></person-group><article-title>MissForest--non-parametric missing value imputation for mixed-type data</article-title><source>Bioinformatics</source><year>2012</year><month>01</month><day>1</day><volume>28</volume><issue>1</issue><fpage>112</fpage><lpage>118</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btr597</pub-id><pub-id pub-id-type="medline">22039212</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>T</given-names> </name><name name-style="western"><surname>Shi</surname><given-names>D</given-names> </name></person-group><article-title>A comparison of full information maximum likelihood and multiple imputation in structural equation modeling with missing data</article-title><source>Psychol Methods</source><year>2021</year><month>08</month><volume>26</volume><issue>4</issue><fpage>466</fpage><lpage>485</lpage><pub-id pub-id-type="doi">10.1037/met0000381</pub-id><pub-id pub-id-type="medline">33507765</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhao</surname><given-names>B</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>R</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>D</given-names> </name><etal/></person-group><article-title>A machine-learning-based approach for identifying diagnostic errors in electronic medical records</article-title><source>IEEE Trans Rel</source><year>2024</year><month>06</month><volume>73</volume><issue>2</issue><fpage>1172</fpage><lpage>1186</lpage><pub-id pub-id-type="doi">10.1109/TR.2023.3330733</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Bioinformatics analysis of comorbid mechanisms between ischemic stroke and end stage renal disease</article-title><source>Sci Rep</source><year>2025</year><month>05</month><day>16</day><volume>15</volume><issue>1</issue><fpage>17060</fpage><pub-id pub-id-type="doi">10.1038/s41598-025-01049-4</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xi</surname><given-names>J</given-names> </name><name name-style="western"><surname>Miao</surname><given-names>M</given-names> </name><name name-style="western"><surname>Li</surname><given-names>PWC</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>DSF</given-names> </name></person-group><article-title>Prognostic effects of multimorbidity clusters on health outcomes in adults: a systematic review and meta-analysis</article-title><source>Ageing Res Rev</source><year>2025</year><month>12</month><volume>112</volume><fpage>102897</fpage><pub-id pub-id-type="doi">10.1016/j.arr.2025.102897</pub-id><pub-id pub-id-type="medline">40934974</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garin</surname><given-names>N</given-names> </name><name name-style="western"><surname>Koyanagi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Chatterji</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Global multimorbidity patterns: a cross-sectional, population-based, multi-country study</article-title><source>J Gerontol A Biol Sci Med Sci</source><year>2016</year><month>02</month><volume>71</volume><issue>2</issue><fpage>205</fpage><lpage>214</lpage><pub-id pub-id-type="doi">10.1093/gerona/glv128</pub-id><pub-id pub-id-type="medline">26419978</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Canoy</surname><given-names>D</given-names> </name><name name-style="western"><surname>Tran</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zottoli</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Association between cardiometabolic disease multimorbidity and all-cause mortality in 2 million women and men registered in UK general practices</article-title><source>BMC Med</source><year>2021</year><month>10</month><day>28</day><volume>19</volume><issue>1</issue><fpage>258</fpage><pub-id pub-id-type="doi">10.1186/s12916-021-02126-x</pub-id><pub-id pub-id-type="medline">34706724</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ching</surname><given-names>K</given-names> </name><name name-style="western"><surname>Houard</surname><given-names>X</given-names> </name><name name-style="western"><surname>Berenbaum</surname><given-names>F</given-names> </name><name name-style="western"><surname>Wen</surname><given-names>C</given-names> </name></person-group><article-title>Hypertension meets osteoarthritis - revisiting the vascular aetiology hypothesis</article-title><source>Nat Rev Rheumatol</source><year>2021</year><month>09</month><volume>17</volume><issue>9</issue><fpage>533</fpage><lpage>549</lpage><pub-id pub-id-type="doi">10.1038/s41584-021-00650-x</pub-id><pub-id pub-id-type="medline">34316066</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Katz</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bartels</surname><given-names>CM</given-names> </name></person-group><article-title>Multimorbidity in rheumatoid arthritis: literature review and future directions</article-title><source>Curr Rheumatol Rep</source><year>2024</year><month>01</month><volume>26</volume><issue>1</issue><fpage>24</fpage><lpage>35</lpage><pub-id pub-id-type="doi">10.1007/s11926-023-01121-w</pub-id><pub-id pub-id-type="medline">37995046</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Adler</surname><given-names>J</given-names> </name><name name-style="western"><surname>Parmryd</surname><given-names>I</given-names> </name></person-group><article-title>Quantifying colocalization by correlation: the Pearson correlation coefficient is superior to the Mander&#x2019;s overlap coefficient</article-title><source>Cytometry A</source><year>2010</year><month>08</month><volume>77</volume><issue>8</issue><fpage>733</fpage><lpage>742</lpage><pub-id pub-id-type="doi">10.1002/cyto.a.20896</pub-id><pub-id pub-id-type="medline">20653013</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nurmohamed</surname><given-names>MT</given-names> </name><name name-style="western"><surname>Heslinga</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kitas</surname><given-names>GD</given-names> </name></person-group><article-title>Cardiovascular comorbidity in rheumatic diseases</article-title><source>Nat Rev Rheumatol</source><year>2015</year><month>12</month><volume>11</volume><issue>12</issue><fpage>693</fpage><lpage>704</lpage><pub-id pub-id-type="doi">10.1038/nrrheum.2015.112</pub-id><pub-id pub-id-type="medline">26282082</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lu</surname><given-names>K</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>F</given-names> </name><name name-style="western"><surname>Yi</surname><given-names>D</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Tong</surname><given-names>L</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>D</given-names> </name></person-group><article-title>Molecular signaling in temporomandibular joint osteoarthritis</article-title><source>J Orthop Translat</source><year>2022</year><month>01</month><volume>32</volume><fpage>21</fpage><lpage>27</lpage><pub-id pub-id-type="doi">10.1016/j.jot.2021.07.001</pub-id><pub-id pub-id-type="medline">35591935</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cielen</surname><given-names>N</given-names> </name><name name-style="western"><surname>Maes</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gayan-Ramirez</surname><given-names>G</given-names> </name></person-group><article-title>Musculoskeletal disorders in chronic obstructive pulmonary disease</article-title><source>Biomed Res Int</source><year>2014</year><volume>2014</volume><fpage>965764</fpage><pub-id pub-id-type="doi">10.1155/2014/965764</pub-id><pub-id pub-id-type="medline">24783225</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bozkurt</surname><given-names>B</given-names> </name><name name-style="western"><surname>Aguilar</surname><given-names>D</given-names> </name><name name-style="western"><surname>Deswal</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Contributory risk and management of comorbidities of hypertension, obesity, diabetes mellitus, hyperlipidemia, and metabolic syndrome in chronic heart failure: a scientific statement from the American Heart Association</article-title><source>Circulation</source><year>2016</year><month>12</month><day>6</day><volume>134</volume><issue>23</issue><fpage>e535</fpage><lpage>e578</lpage><pub-id pub-id-type="doi">10.1161/CIR.0000000000000450</pub-id><pub-id pub-id-type="medline">27799274</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zohar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Cohen</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Bitterman</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Gastrointestinal comorbidities in patients with psoriatic arthritis</article-title><source>Clin Rheumatol</source><year>2016</year><month>11</month><volume>35</volume><issue>11</issue><fpage>2679</fpage><lpage>2684</lpage><pub-id pub-id-type="doi">10.1007/s10067-016-3374-y</pub-id><pub-id pub-id-type="medline">27530409</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Final dataset variables.</p><media xlink:href="medinform_v14i1e84261_app1.docx" xlink:title="DOCX File, 22 KB"/></supplementary-material></app-group></back></article>