<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e75747</article-id><article-id pub-id-type="doi">10.2196/75747</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Predicting Metabolic Dysfunction&#x2013;Associated Fatty Liver Disease Phenotypes Among Adults: 2-Stage Contrastive Learning Method</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Chen</surname><given-names>Sizhe Jasmine</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Xu</surname><given-names>Da</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hu</surname><given-names>Derek K</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Hu</surname><given-names>Paul Jen-Hwa</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Huang</surname><given-names>Ting-Shuo</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="aff" rid="aff6">6</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Operations and Information Systems, David Eccles School of Business, University of Utah</institution><addr-line>1655 East Campus Center Drive</addr-line><addr-line>Salt Lake City</addr-line><addr-line>UT</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Marketing, Analytics, and Professional Sales, School of Business Administration, University of Mississippi</institution><addr-line>University</addr-line><addr-line>MS</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Biomedical Engineering and Department of Computer Engineering and Computer Science, California State University, Long Beach</institution><addr-line>Long Beach</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff4"><institution>Division of General Surgery, Department of Surgery, Jen-Ai Hospital</institution><addr-line>Taichung</addr-line><country>Taiwan</country></aff><aff id="aff5"><institution>Department of Surgery, Chang Gung Memorial Hospital, Keelung Branch</institution><addr-line>Keelung</addr-line><country>Taiwan</country></aff><aff id="aff6"><institution>Department of Chinese Medicine, College of Medicine, Chang Gung University</institution><addr-line>Taoyuan</addr-line><country>Taiwan</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Benis</surname><given-names>Arriel</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Lim</surname><given-names>Gilbert</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Song</surname><given-names>Jiafeng</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Paul Jen-Hwa Hu, PhD, Department of Operations and Information Systems, David Eccles School of Business, University of Utah, 1655 East Campus Center Drive, Salt Lake City, UT, United States, 1 801-587-7785; <email>paul.hu@eccles.utah.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>12</day><month>12</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e75747</elocation-id><history><date date-type="received"><day>09</day><month>04</month><year>2025</year></date><date date-type="accepted"><day>23</day><month>10</month><year>2025</year></date></history><copyright-statement>&#x00A9; Sizhe Jasmine Chen, Da Xu, Derek K Hu, Paul Jen-Hwa Hu, Ting-Shuo Huang. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 12.12.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e75747"/><abstract><sec><title>Background</title><p>Metabolic dysfunction&#x2013;associated fatty liver disease (MAFLD) is a leading cause of chronic disease and can progress to liver fibrosis or hepatocellular carcinoma. Its subtypes&#x2014;obese, diabetic, and lean&#x2014;are associated with varying degrees of fibrotic burden and different complications, yet the existing analytics methods often overlook its multisystem nature, intraphenotype variability, and disease dynamics. These limitations hinder accurate risk stratification and restrict personalized intervention planning.</p></sec><sec><title>Objective</title><p>This study developed a novel, 2-stage, contrastive learning&#x2013;based method to predict the phenotype of MAFLD among adults. This method leverages multiview contrastive learning; it models individual heterogeneities and important relationships in clinical and survey-based data to predict phenotypes among adults, thus supporting clinical decision-making and personalized care.</p></sec><sec sec-type="methods"><title>Methods</title><p>Demographic, clinical, lifestyle, and genetic family history data of 4408 adults revealed how capturing essential relationships in patient data from different sources can transform individual-level representations into multiple, complementary views. Evaluation of the predictive efficacy of the proposed method in comparison with 8 prevalent methods relied on recall, precision, <italic>F</italic><sub>1</sub>-score, and area under the curve values. Moreover, a Shapley additive explanation analysis was performed for interpretability.</p></sec><sec sec-type="results"><title>Results</title><p>The proposed method consistently and significantly outperformed all benchmark methods. It attained the highest <italic>F</italic><sub>1</sub>-score, showing a 32.8% improvement for nondiabetic MAFLD (0.531 vs 0.400) and 30.4% improvement for diabetic MAFLD (0.519 vs 0.398) over the respective best-performing benchmark. The results underscore the clinical value and utility of integrating clinical and survey-based data in the prediction of MAFLD phenotypes among adults.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The proposed method is a viable approach for MAFLD phenotype prediction. It is more effective in identifying at-risk adults than many prevalent data-driven analytics methods and thereby can enhance clinical decision-making and support patient-centric care and management.</p></sec></abstract><kwd-group><kwd>metabolic dysfunction&#x2013;associated fatty liver disease</kwd><kwd>phenotype</kwd><kwd>graph representation learning</kwd><kwd>multiview contrastive learning</kwd><kwd>predictive analytics</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Metabolic dysfunction&#x2013;associated fatty liver disease (MAFLD) is a leading cause of chronic liver disease, affecting more than one-third of the global population [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>] and resulting in annual, direct medical costs of US $103 billion in the United States and &#x20AC;35 billion (US $40 billion) in Europe [<xref ref-type="bibr" rid="ref3">3</xref>]. The relabeling of nonalcoholic fatty liver disease as MAFLD reflects a deeper understanding of fatty liver disease [<xref ref-type="bibr" rid="ref4">4</xref>]. It also helps identify adults at risk of serious prognoses [<xref ref-type="bibr" rid="ref5">5</xref>] such as liver cirrhosis and hepatocellular carcinoma, which account for most liver-related deaths [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. The exacerbation of comorbid conditions due to MAFLD amplifies its clinical significance; patients with chronic liver diseases often develop severe infections, chronic cardiovascular or kidney disease, cancer, and death [<xref ref-type="bibr" rid="ref8">8</xref>]. Yet, therapeutic options for devastating MAFLD-induced liver diseases are limited. Liver transplantation is the optimal treatment [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>] but is greatly restricted by organ availability and financial costs [<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>A diagnosis of MAFLD requires hepatic steatosis in the presence of excessive weight, type 2 diabetes mellitus, or metabolic dysregulation, manifested in the obese, diabetic, and lean phenotypes (subtypes) of MAFLD, respectively [<xref ref-type="bibr" rid="ref12">12</xref>]. These phenotypes have distinct prognostic values [<xref ref-type="bibr" rid="ref5">5</xref>], fibrotic burden [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>], and complications [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. For example, the diabetic phenotype is characterized by severe insulin resistance and is associated with the highest risk of any-cause and disease-specific mortality [<xref ref-type="bibr" rid="ref17">17</xref>]. The obese phenotype is related to lifestyle factors (eg, diet and physical inactivity) and can lead to systemic inflammation and metabolic dysfunction. The lean phenotype involves ectopic fat deposition and genetic predispositions to MAFLD, although without obesity [<xref ref-type="bibr" rid="ref18">18</xref>]. Because of the differences between the MAFLD phenotypes, accurate phenotype prediction is crucial for clinical decision-making, personalized care planning, and efficient resource allocation [<xref ref-type="bibr" rid="ref19">19</xref>]. With relevant insights into the underlying etiology and pathology [<xref ref-type="bibr" rid="ref20">20</xref>], effective phenotype prediction can facilitate patient stratification and treatment planning for streamlining diagnostic procedures, optimizing the use of laboratory tests or imaging, and specifying necessary lifestyle changes, all of which have cost-containment implications [<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>].</p><p>Physicians usually rely on liver biopsies [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>] or score-based methods [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>] that require contemporaneous clinical data, impose substantial costs, and misidentify at-risk adults. These constraints favor the potential of data-driven analytics for supporting timely identification of at-risk adults such that clinicians can formulate actionable risk reduction measures and effective patient stratification and researchers can design more appropriate clinical trials and treatment plans [<xref ref-type="bibr" rid="ref28">28</xref>]. Despite the promise, data-driven analytics for MAFLD phenotyping face several challenges. First, MAFLD is a multisystem disease [<xref ref-type="bibr" rid="ref29">29</xref>] because clinical, family genetics, lifestyle, and socioeconomic factors can influence fatty liver development and progression [<xref ref-type="bibr" rid="ref30">30</xref>]. Incorporating such heterogeneous data in analytics methods, which typically are gathered from different sources, is difficult. For example, surveys designed to gather genetic family history data or lifestyle data tend to have small samples and often suffer from data incompleteness. Second, due to the complex nature of MAFLD, people with the same MAFLD phenotype may exhibit intraphenotype variability in etiology or pathology, which also should be considered for phenotype predictions. Third, both disease classification hierarchy and manifestations of MAFLD involve temporal complexity at the individual level.</p></sec><sec id="s1-2"><title>Objective</title><p>In an effort to design a data-driven method to predict MAFLD phenotypes more accurately, we developed a novel, 2-stage, contrastive learning&#x2013;based method. This method leverages graph representation learning, in combination with interindividual similarity, to process integrated individual-level data pertaining to genetic family history or lifestyle, which then can inform downstream predictions by complementing (incomplete) survey-based data with clinical data or vice versa. In addition, the proposed method incorporates multiview, contrastive pretraining that captures intraphenotype variability on the basis of clinical, genetic family history, and lifestyle data. By linking important data from different sources, it constructs individual-level representations for downstream tasks and predictions. Finally, its 2-stage estimation design accounts for disease hierarchy and temporal complexity, such that the proposed method can predict phenotypes among adults more accurately and explicitly than the existing analytics methods.</p><p>To demonstrate the predictive efficacy of the proposed method, we used clinical and survey-based data of 4408 adults in Taiwan [<xref ref-type="bibr" rid="ref31">31</xref>] and included 8 prevalent methods as benchmarks. The results indicated that the proposed method consistently and significantly outperformed all the benchmarks in both <italic>F</italic><sub>1</sub>-score and area under the curve (AUC). This novel method can predict phenotypes accurately and can potentially contribute to medical informatics research and support personalized care for at-risk adults.</p></sec><sec id="s1-3"><title>Related Work</title><sec id="s1-3-1"><title>MAFLD and Its Phenotypes</title><p>Clinically, MAFLD involves metabolic abnormalities [<xref ref-type="bibr" rid="ref32">32</xref>], and its diagnosis requires hepatic steatosis, which can be determined by imaging, blood biomarker scores, or liver biopsies [<xref ref-type="bibr" rid="ref20">20</xref>]. Adults diagnosed with MAFLD often differ in their phenotypes, prognoses, and complications [<xref ref-type="bibr" rid="ref5">5</xref>], leading to distinct clinical manifestations and metabolic characteristics. For example, diabetic MAFLD is characterized by diabetes mellitus, independent of BMI, and exhibits a higher fibrotic burden than other phenotypes, with substantial risks of hepatocellular carcinoma [<xref ref-type="bibr" rid="ref33">33</xref>] and cardiovascular disease (CVD) [<xref ref-type="bibr" rid="ref15">15</xref>]. Both obese MAFLD and lean MAFLD are determined on the basis of BMI: &#x2265;23 kg/m&#x00B2; and &#x003C;23 kg/m&#x00B2;, respectively. The former condition involves excess adiposity and is associated with insulin resistance, systemic inflammation, and increased risk of cardiovascular complications [<xref ref-type="bibr" rid="ref34">34</xref>]. The latter, also known as metabolic dysregulation, is characterized by metabolic abnormalities, and individuals with this phenotype are at a greater risk of liver-related complications and mortality [<xref ref-type="bibr" rid="ref21">21</xref>]. Because both obese MAFLD and lean MAFLD are determined on the basis of BMI, they can be considered in combination for phenotype predictions. Phenotypic heterogeneity reflects the significant complexity of MAFLD and its varied pathophysiological mechanisms [<xref ref-type="bibr" rid="ref35">35</xref>], which stem from demographic characteristics, clinical variables, lifestyle factors, and genetic predisposition [<xref ref-type="bibr" rid="ref36">36</xref>].</p><p>In turn, the heterogeneity and complexity of MAFLD make timely, accurate phenotype prediction important but difficult. Notably, MAFLD is reversible in its early stages, with appropriate lifestyle changes and clinical interventions [<xref ref-type="bibr" rid="ref35">35</xref>]. On the other hand, advanced stages can induce liver diseases and are associated with poor prognoses [<xref ref-type="bibr" rid="ref37">37</xref>]. In general, accurate phenotype predictions are needed within a 1-year timeframe [<xref ref-type="bibr" rid="ref38">38</xref>] because MAFLD often exhibits few or no directly observable symptoms until liver damage has occurred. By identifying at-risk adults in a timely manner, physicians can encourage lifestyle changes such as dietary alterations or reduced alcohol consumption [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>] and plan for laboratory tests or imaging examinations (eg, abdominal ultrasound) [<xref ref-type="bibr" rid="ref36">36</xref>].</p></sec><sec id="s1-3-2"><title>Data-Driven Analytics Methods for Patient Risk and Outcome Predictions</title><p>Existing data-driven analytics for MAFLD phenotype predictions rely on regression-based [<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref43">43</xref>], tree-based [<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref46">46</xref>], neural network (NN)&#x2013;based [<xref ref-type="bibr" rid="ref47">47</xref>-<xref ref-type="bibr" rid="ref49">49</xref>], or graph-based [<xref ref-type="bibr" rid="ref50">50</xref>-<xref ref-type="bibr" rid="ref52">52</xref>] methods. Regression-based methods, such as Cox regression&#x2013;based risk estimation [<xref ref-type="bibr" rid="ref42">42</xref>] and logistic regression models [<xref ref-type="bibr" rid="ref43">43</xref>], use statistical modeling to predict patient risk and outcomes, support patient risk predictions, and identify important factors. However, these methods cannot deal with high-dimensional data or nonlinear relationships and often make strong data property assumptions. A tree-based method can model nonlinear relationships and derive predictions by applying variable values to split the data recursively, as exemplified by decision tree (DT) [<xref ref-type="bibr" rid="ref44">44</xref>], random forest (RF) [<xref ref-type="bibr" rid="ref45">45</xref>], and extreme gradient boosting (XGBoost) [<xref ref-type="bibr" rid="ref46">46</xref>] methods. While intuitive and interpretable, tree-based methods struggle with overfitting in the presence of noise or data sparsity, and they cannot handle missing data or individual heterogeneity effectively [<xref ref-type="bibr" rid="ref53">53</xref>]. The deep learning, NN-based methods are able to model complex relationships and nonlinear interactions [<xref ref-type="bibr" rid="ref54">54</xref>]. For example, deep autoencoders [<xref ref-type="bibr" rid="ref49">49</xref>] and multilayer perceptron (MLP) [<xref ref-type="bibr" rid="ref48">48</xref>] methods are advantageous for representing multisource data with high-dimensional features. But they can be difficult to train and are prone to overfitting, especially with insufficient, incomplete, or low-quality data [<xref ref-type="bibr" rid="ref55">55</xref>]. Finally, graph-based methods represent data as nodes and edges in a graph; they are designed to capture complex relationships and interactions among entities (eg, patients and medications) to inform downstream predictions. Representative methods include graph convolutional networks (GCNs) [<xref ref-type="bibr" rid="ref56">56</xref>], graph attention networks (GATs) [<xref ref-type="bibr" rid="ref57">57</xref>], and GraphSAGE [<xref ref-type="bibr" rid="ref58">58</xref>]. Despite their general effectiveness, graph-based methods rely on predefined graph structures, which can restrict their ability to account for complex, multifaceted, individual feature interactions.</p><p>As summarized in <xref ref-type="table" rid="table1">Table 1</xref>, the existing analytics methods seem generally effective for estimating patient risk and outcome, but their direct use for MAFLD phenotype prediction is insufficient for several reasons. First, many methods depend on clinical data available in electronic health records, which prevents them from accounting for the multifactorial nature of MAFLD. For example, effective phenotype prediction needs to consider genetic family history and lifestyle data, but the incorporation of such data complicates the modeling and obscures patterns essential for accurate prediction, in addition to sample size and data incompleteness issues. Second, most of the prevalent methods do not capture intraphenotype variability, which is critical for downstream predictions. For example, semisupervised (eg, contrastive) learning can deal with complex representations [<xref ref-type="bibr" rid="ref59">59</xref>-<xref ref-type="bibr" rid="ref61">61</xref>], but its use requires data augmentation [<xref ref-type="bibr" rid="ref62">62</xref>-<xref ref-type="bibr" rid="ref65">65</xref>] and complementary views [<xref ref-type="bibr" rid="ref66">66</xref>], in addition to the tabular data common in healthcare settings. Third, MAFLD phenotype prediction involves disease classification hierarchy and temporal dynamics. For instance, individuals are classified as those with and without MAFLD (MAFLD and non-MAFLD, respectively), and those with MAFLD need to be further classified into distinct phenotypes by a selective layer, which implies a priori knowledge to inform appropriate feature selection.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Comparison of this study with representative previous studies.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Study</td><td align="left" valign="bottom">Method</td><td align="left" valign="bottom">Multisource<break/>data integration</td><td align="left" valign="bottom">Data heterogeneity</td><td align="left" valign="bottom">Intraphenotype variability</td><td align="left" valign="bottom">Disease dynamics</td></tr></thead><tbody><tr><td align="left" valign="top">Jia et al (2019) [<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="left" valign="top">Regression-based</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">Yang et al (2024) [<xref ref-type="bibr" rid="ref67">67</xref>]</td><td align="left" valign="top">Regression-based</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">Ksi&#x0105;&#x017C;ek et al (2021) [<xref ref-type="bibr" rid="ref43">43</xref>]</td><td align="left" valign="top">Regression-based</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">Pasadana et al (2021) [<xref ref-type="bibr" rid="ref68">68</xref>]</td><td align="left" valign="top">Tree-based</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">Wang et al (2019) [<xref ref-type="bibr" rid="ref69">69</xref>]</td><td align="left" valign="top">Tree-based</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td></tr><tr><td align="left" valign="top">Hashem et al (2012) [<xref ref-type="bibr" rid="ref70">70</xref>]</td><td align="left" valign="top">NN-based<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td></tr><tr><td align="left" valign="top">Franco et al (2021) [<xref ref-type="bibr" rid="ref49">49</xref>]</td><td align="left" valign="top">NN-based</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">Chowdhury et al (2024) [<xref ref-type="bibr" rid="ref51">51</xref>]</td><td align="left" valign="top">Graph-based</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">Zhang et al (2022) [<xref ref-type="bibr" rid="ref52">52</xref>]</td><td align="left" valign="top">Graph-based</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">Zheng et al (2022) [<xref ref-type="bibr" rid="ref71">71</xref>]</td><td align="left" valign="top">Graph-based</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td></tr><tr><td align="left" valign="top">This study</td><td align="left" valign="top">2-Stage, contrastive learning&#x2013;based</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>NN: neural network.</p></fn></table-wrap-foot></table-wrap></sec></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Materials</title><p>We used 2-year longitudinal data of 4408 adults, obtained from a major healthcare organization in Taiwan, to evaluate the proposed method in comparison with 8 prevalent methods. No adults in the sample had MAFLD in year 1. For each person, the data include 2 demographic variables, 36 clinical variables, 32 lifestyle variables, and 42 genetic family history&#x2013;related variables. <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> provides the description and coding of variables. With these data, we evaluated the ability of each method to predict whether a person would develop MAFLD in year 2 and, if so, of which phenotype.</p><p>Of the 4408 individuals in our sample, 2999 (68.1%) were women, and 1409 (31.9%) were men, with an average age of 58.18 (SD 12.94) years. The outcome class distribution was imbalanced: 85.0% non-MAFLD (3747/4408), 11.5% nondiabetic MAFLD (507/4408), and 3.5% diabetic MAFLD (154/4408). We used class weights during model training to address the imbalance issue. Prior to making phenotype predictions, we applied <italic>z</italic> score standardization to numeric variables and one-hot encoding to categorical variables to prepare the data.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>This study was approved by the Chang Gung Medical Foundation Institutional Review Board (201800270B0). All procedures were performed in accordance with relevant guidelines and regulations. Written informed consent was obtained from all participants. All patient information was anonymized prior to analysis, and the study complied with ethical standards for research involving deidentified healthcare data. Participants were informed that their involvement was voluntary and that they could withdraw from the study at any time without penalty. No financial compensation was provided.</p></sec><sec id="s2-3"><title>Proposed Method</title><sec id="s2-3-1"><title>Problem Definition</title><p>Let <inline-formula><mml:math id="ieqn1"><mml:mi>D</mml:mi></mml:math></inline-formula> be individual demographics, <inline-formula><mml:math id="ieqn2"><mml:mi>C</mml:mi></mml:math></inline-formula> represent clinical variables, <inline-formula><mml:math id="ieqn3"><mml:mi>S</mml:mi></mml:math></inline-formula> denote genetic family history&#x2013;related and lifestyle data, and <inline-formula><mml:math id="ieqn4"><mml:mi>Y</mml:mi></mml:math></inline-formula> indicate distinct MAFLD outcomes. Phenotype prediction represents a multiclass classification task: given <inline-formula><mml:math id="ieqn5"><mml:mi>D</mml:mi></mml:math></inline-formula>, <inline-formula><mml:math id="ieqn6"><mml:mi>C</mml:mi></mml:math></inline-formula>, and <inline-formula><mml:math id="ieqn7"><mml:mi>S</mml:mi></mml:math></inline-formula>, the objective is to effectively process <inline-formula><mml:math id="ieqn8"><mml:mi>S</mml:mi></mml:math></inline-formula> based on the observed values, then integrate with <inline-formula><mml:math id="ieqn9"><mml:mi>D</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn10"><mml:mi>C</mml:mi></mml:math></inline-formula> to predict whether an individual is likely to develop a specific MAFLD phenotype within a 1-year timeframe. By effectively processing <inline-formula><mml:math id="ieqn11"><mml:mi>S</mml:mi></mml:math></inline-formula>, it is possible to extract useful information from <inline-formula><mml:math id="ieqn12"><mml:mi>S</mml:mi></mml:math></inline-formula>, to better cope with the missingness that often arises among self-reported genetic family history data and lifestyle data for improved predictive efficacy. We considered 3 outcome classes for the multiclass classification task, <inline-formula><mml:math id="ieqn13"><mml:mi>Y</mml:mi><mml:mo>=</mml:mo><mml:mfenced open="{" close="}" separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:mfenced></mml:math></inline-formula>, which correspond to the non-MAFLD, nondiabetic MAFLD, and diabetic MAFLD phenotypes, respectively. The combination of obese MAFLD and lean MAFLD phenotypes into a single outcome class (nondiabetic MAFLD) is justified because both phenotypes rely solely on BMI. It also simplifies the outcome class classification and allows for meaningful, accurate predictions, in that physicians can readily separate obese and lean MAFLD according to BMI values, which offers clinical practicality [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref72">72</xref>] and facilitates predictions [<xref ref-type="bibr" rid="ref73">73</xref>,<xref ref-type="bibr" rid="ref74">74</xref>].</p></sec><sec id="s2-3-2"><title>Architectural Framework</title><p><xref ref-type="fig" rid="figure1">Figure 1</xref> depicts the proposed method&#x2019;s architectural framework and highlights its 3 important components: graph representation learning, multiview contrastive pretraining, and 2-stage risk estimation. With graph representation learning, the method uses sparse, incomplete survey data to build 2 individual-feature bipartite networks, a person-lifestyle graph and a person-genetics graph, which are used to learn graph representations. The multiview contrastive pretraining component then uses the individual graph representations as inputs to capture intraphenotype variability and create lifestyle and genetics embeddings. Finally, these embeddings are combined with demographic and clinical data in the 2-stage risk estimation process to predict the likelihood of each outcome class for an individual.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Architectural framework of the proposed method. MAFLD: metabolic dysfunction&#x2013;associated fatty liver disease; MC: multiview contrastive.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e75747_fig01.png"/></fig></sec><sec id="s2-3-3"><title>Graph Representation Learning</title><p>We used lifestyle and genetic family history data to perform the novel graph representation learning and construct both person-lifestyle and person-genetics networks. The former captures relationships among individuals according to their lifestyle predispositions (eg, shared dietary habits and physical activities). The latter leverages genetic family history&#x2013;related variables (eg, shared alleles and single nucleotide polymorphisms) that can influence individuals&#x2019; biological or genetic predispositions. These 2 networks were constructed separately to enable the graph representation learning component to concentrate on unique structures and relationships intrinsic to each type of data, thereby capturing the interplay of lifestyle and family genetic variables.</p><p><xref ref-type="fig" rid="figure2">Figure 2</xref> illustrates the construction of 2 bipartite networks. For the person-lifestyle bipartite network, <inline-formula><mml:math id="ieqn14"><mml:msup><mml:mrow><mml:mi>G</mml:mi></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:msubsup><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msup><mml:mo>}</mml:mo></mml:math></inline-formula>, <inline-formula><mml:math id="ieqn15"><mml:msubsup><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msub><mml:mo>}</mml:mo></mml:math></inline-formula> refers to a set of individuals, <inline-formula><mml:math id="ieqn16"><mml:msubsup><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mn>11</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mn>12</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>M</mml:mi><mml:mi>J</mml:mi></mml:mrow></mml:msub><mml:mo>}</mml:mo></mml:math></inline-formula> represents lifestyle features, and <inline-formula><mml:math id="ieqn17"><mml:msup><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> denotes an edge set that links <inline-formula><mml:math id="ieqn18"><mml:msubsup><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn19"><mml:msubsup><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>. <inline-formula><mml:math id="ieqn20"><mml:mi>N</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn21"><mml:mi>M</mml:mi></mml:math></inline-formula> denote the total number of individuals and lifestyle feature values, respectively. For each lifestyle feature, multiple nodes are used to indicate its plausible (coded) values. <inline-formula><mml:math id="ieqn22"><mml:mi>J</mml:mi></mml:math></inline-formula> denotes the number of distinct values or categories of <inline-formula><mml:math id="ieqn23"><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>M</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>; thus, <inline-formula><mml:math id="ieqn24"><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> denotes the <inline-formula><mml:math id="ieqn25"><mml:mi>j</mml:mi></mml:math></inline-formula>th category of feature <inline-formula><mml:math id="ieqn26"><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. If person <inline-formula><mml:math id="ieqn27"><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>u</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> has a value on lifestyle feature <inline-formula><mml:math id="ieqn28"><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>v</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> of the <inline-formula><mml:math id="ieqn29"><mml:mi>j</mml:mi></mml:math></inline-formula>th category, there exists an undirected link <inline-formula><mml:math id="ieqn30"><mml:msub><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>u</mml:mi><mml:mo>,</mml:mo><mml:mi>v</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> between nodes <inline-formula><mml:math id="ieqn31"><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>u</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn32"><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>v</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, and the edge weight reflects <inline-formula><mml:math id="ieqn33"><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>u</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>&#x2019;s value on feature <inline-formula><mml:math id="ieqn34"><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>v</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Graph representation learning component of the proposed method. NA: not applicable.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e75747_fig02.png"/></fig><p>For the person-lifestyle bipartite network, we used GraphSAGE [<xref ref-type="bibr" rid="ref58">58</xref>] to learn representations for the nodes and edges. We relied on triplet loss to train the graph representation model, which involved an anchor node, a positive sample (neighboring nodes or the node itself if no neighbors existed), and a negative sample:</p><disp-formula id="E1"><label>(1)</label><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>a</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>p</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>a</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>n</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:mi>&#x03B1;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn35"><mml:mi>a</mml:mi></mml:math></inline-formula> is the anchor node, <inline-formula><mml:math id="ieqn36"><mml:mi>p</mml:mi></mml:math></inline-formula> is the positive node, <inline-formula><mml:math id="ieqn37"><mml:mi>n</mml:mi></mml:math></inline-formula> is the negative sample, <inline-formula><mml:math id="ieqn38"><mml:mi>d</mml:mi><mml:mo>(</mml:mo><mml:mo>&#x2219;</mml:mo><mml:mo>)</mml:mo></mml:math></inline-formula> is the distance function, <inline-formula><mml:math id="ieqn39"><mml:mi>f</mml:mi><mml:mo>(</mml:mo><mml:mo>&#x2219;</mml:mo><mml:mo>)</mml:mo></mml:math></inline-formula> is the embedding function, and <inline-formula><mml:math id="ieqn40"><mml:mi>&#x03B1;</mml:mi></mml:math></inline-formula> is a margin parameter.  <inline-formula><mml:math id="ieqn41"><mml:msubsup><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> represents the learned node embedding for each person <inline-formula><mml:math id="ieqn42"><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. Similarly, we built the person-genetics bipartite network, <inline-formula><mml:math id="ieqn43"><mml:msup><mml:mrow><mml:mi>G</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:msubsup><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msup><mml:mo>}</mml:mo></mml:math></inline-formula>, to learn the genetic representation <inline-formula><mml:math id="ieqn44"><mml:msubsup><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>. The representations learned from these 2 networks provided the input for the contrastive pretraining component.</p></sec><sec id="s2-3-4"><title>Multiview Contrastive Pretraining</title><p>Originally developed for computer vision tasks, contrastive learning leverages data augmentation and complementary views for effective representation learning [<xref ref-type="bibr" rid="ref66">66</xref>]. Conventional, supervised learning faces multifaceted challenges, especially when dealing with high intraclass variance and imbalanced outcome class distribution. Contrastive learning offers a viable solution by learning data representations through instance discrimination. The core idea is intuitive: instead of solely relying on labeled examples, contrastive learning learns to distinguish among different patients while ensuring that similar patients have similar representations in the learned feature space. This self-supervised approach can learn robust features, particularly in scenarios involving limited or imbalanced labeled data. However, existing contrastive learning methods, such as MoCo [<xref ref-type="bibr" rid="ref63">63</xref>] and SimCLR [<xref ref-type="bibr" rid="ref65">65</xref>], rely heavily on data augmentation techniques such as cropping and rotation in images, which are not directly applicable to structured patient data.</p><p>We designed a novel multiview contrastive pretraining component that leverages multiple context-specific representations to capture intraphenotype variability. In the proposed method, multiview contrastive learning examines patients&#x2019; clinical profiles from multiple perspectives and learns discriminative representations that better predict infrequent but important MAFLD subtypes while maintaining performance across different categories. For this task, an intuitive learning objective can be defined by the cosine similarity among individuals, according to the person-lifestyle representation <inline-formula><mml:math id="ieqn45"><mml:msubsup><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>, person-genetic representation <inline-formula><mml:math id="ieqn46"><mml:msubsup><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>, and clinical data <inline-formula><mml:math id="ieqn47"><mml:mi>C</mml:mi></mml:math></inline-formula>. The intent is to capture intraphenotype variability. We applied guided, collaborative training to steer the training process, for which we used clinical variables for the teacher view and survey-based, context-specific representations (<inline-formula><mml:math id="ieqn48"><mml:msubsup><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn49"><mml:msubsup><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>) for the learner views. The resulting model can integrate and align critical information from clinical and survey-based data.</p><p><xref ref-type="fig" rid="figure3">Figure 3</xref> depicts the contrastive pretraining component, in which 3 encoders (<inline-formula><mml:math id="ieqn50"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, <inline-formula><mml:math id="ieqn51"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, and <inline-formula><mml:math id="ieqn52"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>) process the representations of lifestyle data, clinical data, and genetic family history data, respectively. Thus, <inline-formula><mml:math id="ieqn53"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is pretrained with an autoencoder to produce the teacher view that anchors the learning process. As learner views, <inline-formula><mml:math id="ieqn54"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn55"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> are trained according to <inline-formula><mml:math id="ieqn56"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> during the contrastive learning process. Both <inline-formula><mml:math id="ieqn57"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn58"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> adopt the same 3-layer MLP with nonlinear activation functions. The outputs of <inline-formula><mml:math id="ieqn59"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, <inline-formula><mml:math id="ieqn60"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, and <inline-formula><mml:math id="ieqn61"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> are represented by <inline-formula><mml:math id="ieqn62"><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, <inline-formula><mml:math id="ieqn63"><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, and <inline-formula><mml:math id="ieqn64"><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, which denote the embeddings of lifestyle, clinical, and genetic family history data, respectively. For a person <inline-formula><mml:math id="ieqn65"><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, the objective is to align the cosine similarity of the embeddings of positive pairs {<inline-formula><mml:math id="ieqn66"><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>} and {<inline-formula><mml:math id="ieqn67"><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>}, according to the <inline-formula><mml:math id="ieqn68"><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>N</mml:mi><mml:mi>C</mml:mi><mml:mi>E</mml:mi><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi></mml:math></inline-formula>:</p><disp-formula id="E2"> <label>(2)</label><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mi>a</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mi>b</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mi>log</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mfrac><mml:mrow><mml:mi>exp</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mtext>sim</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mi>a</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mi>b</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mi>exp</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mtext>sim</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mi>a</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mi>b</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mfrac></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn69"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mo>&#x22C5;</mml:mo><mml:mo>,</mml:mo><mml:mo>&#x22C5;</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the similarity function, and <inline-formula><mml:math id="ieqn70"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the temperature parameter.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Multiview contrastive learning component of the proposed method.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e75747_fig03.png"/></fig><p>In contrastive learning, fixed temperature settings are generally ineffective for heterogeneous data distributions [<xref ref-type="bibr" rid="ref75">75</xref>]. Therefore, we designed an adaptive temperature network (ATN) to adjust the temperature, <inline-formula><mml:math id="ieqn71"><mml:msub><mml:mrow><mml:mi>&#x03C4;</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, dynamically. As a lightweight NN, the ATN uses batch-level aggregated statistics as input and generates a single temperature value:</p><disp-formula id="E3"> <label>(3)</label><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>V</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>z</mml:mi><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>and</p><disp-formula id="E4"><label>(4)</label><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>W</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi><mml:mi>u</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>V</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn72"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>n</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the batch size; <inline-formula><mml:math id="ieqn73"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>V</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the aggregated feature representation, calculated as the batch average of clinical representations <inline-formula><mml:math id="ieqn74"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mo fence="false" stretchy="false">{</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mi>b</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo fence="false" stretchy="false">}</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>; and <inline-formula><mml:math id="ieqn75"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the temperature value for each data batch.</p><p>Both <inline-formula><mml:math id="ieqn76"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn77"><mml:msub><mml:mrow><mml:mi>E</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> are trained with a cross-entropy loss:</p><disp-formula id="E5"><label>(5)</label><mml:math id="eqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable columnalign="left left" rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn78"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn79"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> reflect the contrastive loss between <inline-formula><mml:math id="ieqn80"><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn81"><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn82"><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn83"><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, respectively. Multiview contrastive learning ensures that the learned lifestyle and family genetics embeddings (learner view) align with the clinical embeddings (teacher view), which enhances representation quality.</p></sec><sec id="s2-3-5"><title>Two-Stage Risk Estimation</title><p>Finally, the 2-stage deep NN component for MAFLD phenotype prediction targets important interphenotype relationships. As depicted in <xref ref-type="fig" rid="figure4">Figure 4</xref>, this component estimates whether a person is likely to develop MAFLD (<inline-formula><mml:math id="ieqn84"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mover><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>), such that <inline-formula><mml:math id="ieqn85"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mover><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> if there is an indication of any MAFLD phenotype and  <inline-formula><mml:math id="ieqn86"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mover><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> otherwise. In the former case, the component then estimates the likelihood of a specific phenotype and produces the probability distribution <inline-formula><mml:math id="ieqn87"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mover><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>H</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, corresponding to distinct phenotypes, where <inline-formula><mml:math id="ieqn88"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>H</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the total number of phenotypes. This hierarchical estimation design enables the proposed method to capture general characteristics of MAFLD and distinct phenotypes for predictions. The overall probability distribution <inline-formula><mml:math id="ieqn89"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mover><mml:mi>Y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> can be calculated as follows:</p><disp-formula id="E6"> <label>(6)</label><mml:math id="eqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mover><mml:mi>Y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo maxsize="1.623em" minsize="1.623em">[</mml:mo></mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>Y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msubsup><mml:mrow><mml:mover><mml:mi>Y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x22C5;</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>Y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msubsup><mml:mrow><mml:mover><mml:mi>Y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x22C5;</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>Y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msubsup><mml:mrow><mml:mover><mml:mi>Y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x22C5;</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>Y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msubsup><mml:mrow><mml:mover><mml:mi>Y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>H</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x22C5;</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>Y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mrow><mml:mo maxsize="1.623em" minsize="1.623em">]</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Two-stage phenotype prediction component of the proposed method, where Y<sub>a</sub> represents the first-stage binary prediction, indicating the presence (Y<sub>a</sub>=1) or absence (Y<sub>a</sub>=0) of any MAFLD phenotype. Y<sub>b</sub> represents the second-stage probability distribution over the H specific phenotypes, which is subsequently estimated if Y<sub>a</sub>=1. Y<sub>b1</sub>, Y<sub>bn</sub>, and Y<sub>bH</sub> denote the estimated probabilities for the first, -th, and -th (final) specific MAFLD phenotypes, respectively. MAFLD: metabolic dysfunction&#x2013;associated fatty liver disease.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e75747_fig04.png"/></fig><p>In the 2-stage estimation process, we also designed a loss function to train the proposed method:</p><disp-formula id="E7"><label>(7)</label><mml:math id="eqn7"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable columnalign="" rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mtext>total</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>y</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>n</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mi>log</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>n</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>&#x03B3;</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mi>log</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>+</mml:mo><mml:mi>&#x03BB;</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>M</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>m</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mi>log</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>m</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>The first term of <inline-formula><mml:math id="ieqn90"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the negative log-likelihood loss, calculated according to the actual and predicted MAFLD phenotype. The second and third terms denote the losses in the first and second stages, respectively, and <inline-formula><mml:math id="ieqn91"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B3;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn92"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"/></mml:mrow></mml:mstyle></mml:math></inline-formula> are hyperparameters that control the trade-offs among these 3 terms. Specifically, <inline-formula><mml:math id="ieqn93"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>n</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> indicates the overall predicted probability of the <inline-formula><mml:math id="ieqn94"><mml:mi>n</mml:mi></mml:math></inline-formula>th class for person <inline-formula><mml:math id="ieqn95"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, <inline-formula><mml:math id="ieqn96"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the estimated probability of MAFLD (binary, k=2), and <inline-formula><mml:math id="ieqn97"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>m</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> denotes the estimated probability of the <inline-formula><mml:math id="ieqn98"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>m</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>th phenotype for individuals predicted to have MAFLD in stage 2. With <inline-formula><mml:math id="ieqn99"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, our method learns interphenotype relationships for phenotype prediction.</p></sec></sec><sec id="s2-4"><title>Evaluations</title><p>Eight prevalent methods were included as benchmarks: DT [<xref ref-type="bibr" rid="ref44">44</xref>], RF [<xref ref-type="bibr" rid="ref45">45</xref>], XGBoost [<xref ref-type="bibr" rid="ref46">46</xref>], MLP [<xref ref-type="bibr" rid="ref48">48</xref>], autoencoder [<xref ref-type="bibr" rid="ref49">49</xref>], GAT [<xref ref-type="bibr" rid="ref57">57</xref>], GCN [<xref ref-type="bibr" rid="ref56">56</xref>], and GraphSAGE [<xref ref-type="bibr" rid="ref58">58</xref>]. These methods represent different analytics approaches and are frequently used for clinical prediction tasks; therefore, they are suitable for performance comparisons. Many of these benchmark methods are not designed to deal with incomplete data. Because the sample had missing values, we applied k-nearest neighbor (k=5) imputation [<xref ref-type="bibr" rid="ref76">76</xref>-<xref ref-type="bibr" rid="ref78">78</xref>] to the dataset and used one-hot encoding for categorical variables during data preprocessing to ensure consistency and comparability in the evaluations, that is, all methods used the same preprocessed data for fair comparisons. The only difference was that the proposed method also used the raw, nonimputed survey data (genetic family history and lifestyle data) as input for graph representation learning and contrastive learning, which are components capable of handling missing values. Moreover, we conducted an ablation study to examine the relative contribution of each key component to the proposed method&#x2019;s overall performance.</p><p>To examine the prediction performance of each method, we randomly split the sample 10 times, using different random seeds to ensure robustness. In each trial, we used 80% of the data for model training and the remaining 20% for testing [<xref ref-type="bibr" rid="ref76">76</xref>]. We also conducted 5-fold cross-validation on the training data prior to the evaluations and performed a series of analyses to fine-tune the key parameters of each method. <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> summarizes important parameter values of the respective methods. Performance assessments relied on precision, recall, <italic>F</italic><sub>1</sub>-score, and AUC values. We did not consider accuracy, as it could not reflect prediction performance due to the imbalanced distribution of the outcome classes [<xref ref-type="bibr" rid="ref79">79</xref>]. Compared with precision or recall, the <italic>F</italic><sub>1</sub>-score and AUC are arguably better indicators of a method&#x2019;s efficacy of predicting MAFLD phenotypes. As reported by Docherty et al [<xref ref-type="bibr" rid="ref80">80</xref>], we adopted a one-versus-rest strategy to assess each outcome class and compared the respective AUC values of all methods, which supports a fair, holistic analysis of their ability to predict MAFLD phenotypes.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overall Prediction Performance</title><p><xref ref-type="table" rid="table2">Table 2</xref> presents each method&#x2019;s prediction performance across 10 trials. The proposed method has a 2-stage estimation design&#x2014;stage 1 estimates whether an individual will develop MAFLD, and stage 2 predicts the likelihood of each MAFLD phenotype. Therefore, we report the results for each stage separately. As <xref ref-type="table" rid="table2">Table 2</xref> shows, the proposed method attained higher AUC values in both stages, indicating its ability to distinguish patients with different outcomes. In stage 1, it accurately identified adults likely to develop MAFLD, with few false alarms, as signified by the relatively high precision and recall values. In stage 2, the proposed method generated effective predictions by consolidating the stage 1 results. The multiclass prediction results in stage 2 also allowed for direct comparisons with the benchmark methods. As seen in <xref ref-type="table" rid="table2">Table 2</xref>, the proposed method outperformed all benchmarks on both <italic>F</italic><sub>1</sub>-score and AUC. It exhibited a 7.2% improvement in AUC over the best-performing benchmark (0.898 vs 0.838) and had a 16.6% higher <italic>F</italic><sub>1</sub>-score than the best-performing benchmark (0.652 vs 0.559). Paired two-tailed <italic>t</italic> tests performed to examine differences in AUC indicated that the observed improvements were statistically significant (<italic>P</italic>&#x003C;.001).</p><p><xref ref-type="fig" rid="figure5">Figure 5</xref> presents the respective receiver operating characteristic curves of all methods. The proposed method&#x2019;s AUC curve was notably better than that of any benchmark method. This result further affirms its superior efficacy in estimating MAFLD phenotypes among adults compared with many prevalent methods.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Overall performance of each investigated method.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Method</td><td align="left" valign="bottom" colspan="4">Performance metric, mean (SE)</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">AUC<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">DT<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="top">0.549 (0.012)</td><td align="left" valign="top">0.468 (0.007)</td><td align="left" valign="top">0.493 (0.007)</td><td align="left" valign="top">0.765 (0.007)</td></tr><tr><td align="left" valign="top">RF<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">0.576 (0.021)</td><td align="left" valign="top">0.542 (0.019)</td><td align="left" valign="top">0.541 (0.016)</td><td align="left" valign="top">0.819 (0.007)</td></tr><tr><td align="left" valign="top">XGBoost<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.598 (0.019)</td><td align="left" valign="top">0.490 (0.015)</td><td align="left" valign="top">0.525 (0.019)</td><td align="left" valign="top">0.812 (0.019)</td></tr><tr><td align="left" valign="top">MLP<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">0.567 (0.008)</td><td align="left" valign="top">0.570 (0.019)</td><td align="left" valign="top">0.557 (0.008)</td><td align="left" valign="top">0.831 (0.002)</td></tr><tr><td align="left" valign="top">Autoencoder</td><td align="left" valign="top">0.537 (0.011)</td><td align="left" valign="top">0.566 (0.023)</td><td align="left" valign="top">0.528 (0.010)</td><td align="left" valign="top">0.832 (0.003)</td></tr><tr><td align="left" valign="top">GAT<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup></td><td align="left" valign="top">0.528 (0.014)</td><td align="left" valign="top">0.542 (0.022)</td><td align="left" valign="top">0.512 (0.010)</td><td align="left" valign="top">0.823 (0.004)</td></tr><tr><td align="left" valign="top">GCN<sup><xref ref-type="table-fn" rid="table2fn7">g</xref></sup></td><td align="left" valign="top">0.505 (0.011)</td><td align="left" valign="top">0.554 (0.012)</td><td align="left" valign="top">0.512 (0.014)</td><td align="left" valign="top">0.824 (0.005)</td></tr><tr><td align="left" valign="top">GraphSAGE</td><td align="left" valign="top">0.540 (0.010)</td><td align="left" valign="top">0.598 (0.011)</td><td align="left" valign="top">0.559 (0.009)</td><td align="left" valign="top">0.838 (0.004)</td></tr><tr><td align="left" valign="top">Proposed method (stage 1)</td><td align="left" valign="top">0.713 (0.016)</td><td align="left" valign="top">0.745 (0.008)</td><td align="left" valign="top">0.726 (0.011)</td><td align="left" valign="top">0.859 (0.004)</td></tr><tr><td align="left" valign="top">Proposed method (stage 2)</td><td align="left" valign="top">0.644 (0.022)</td><td align="left" valign="top">0.678 (0.027)</td><td align="left" valign="top">0.652 (0.013)</td><td align="left" valign="top">0.898 (0.003)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>AUC: area under the curve.</p></fn><fn id="table2fn2"><p><sup>b</sup>DT: decision tree.</p></fn><fn id="table2fn3"><p><sup>c</sup>RF: random forest.</p></fn><fn id="table2fn4"><p><sup>d</sup>XGBoost: extreme gradient boosting.</p></fn><fn id="table2fn5"><p><sup>e</sup>MLP: multilayer perceptron.</p></fn><fn id="table2fn6"><p><sup>f</sup>GAT: graph attention network.</p></fn><fn id="table2fn7"><p><sup>g</sup>GCN: graph convolutional network.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Area under the curve (AUC) values for the investigated methods. GAT: graph attention network; GCN: graph convolutional network; MLP: multilayer perceptron; ROC: receiver operating characteristic.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e75747_fig05.png"/></fig></sec><sec id="s3-2"><title>Prediction Performance for Each Outcome Class</title><p>In addition to overall performance, we examined the respective methods&#x2019; performance for each outcome class. As shown in <xref ref-type="table" rid="table3">Table 3</xref>, the proposed method achieved the highest <italic>F</italic><sub>1</sub>-score and AUC values for each outcome class, reaffirming its superior prediction ability. It attained a higher <italic>F</italic><sub>1</sub>-score (0.913) and AUC (0.859) for non-MAFLD than the respective best-performing benchmarks (DT: <italic>F</italic><sub>1</sub>-score=0.908; GraphSAGE: AUC=0.801). The performance improvements were especially prominent for the MAFLD phenotypes. For nondiabetic MAFLD, our method achieved an <italic>F</italic><sub>1</sub>-score of 0.531, much higher than that of the best-performing benchmark (MLP: <italic>F</italic><sub>1</sub>-score=0.400), exhibiting a 32.8% improvement. It also attained the highest AUC (0.878), higher than that of the best-performing benchmark (GraphSAGE: AUC=0.804). For diabetic MAFLD, the proposed method&#x2019;s <italic>F</italic><sub>1</sub>-score (0.519) was 30.4% higher than that of the best-performing benchmark (GraphSAGE: <italic>F</italic><sub>1</sub>-score=0.398). Moreover, its precision value was superior to that of other methods, suggesting that it can identify adults who are likely to develop diabetic MAFLD with fewer false alarms.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Prediction performance of each method for 3 outcome classes.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Outcome class and method</td><td align="left" valign="bottom" colspan="4">Performance metric, mean (SE)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Precision</td><td align="left" valign="top">Recall</td><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">AUC</td></tr></thead><tbody><tr><td align="left" valign="top">Non-MAFLD<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Decision tree</td><td align="left" valign="top">0.879 (0.002)</td><td align="left" valign="top">0.941 (0.004)</td><td align="left" valign="top">0.908 (0.002)</td><td align="left" valign="top">0.746 (0.007)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Random forest</td><td align="left" valign="top">0.892 (0.004)</td><td align="left" valign="top">0.938 (0.003)</td><td align="left" valign="top">0.901 (0.004)</td><td align="left" valign="top">0.781 (0.006)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">0.881 (0.002)</td><td align="left" valign="top">0.954 (0.003)</td><td align="left" valign="top">0.895 (0.004)</td><td align="left" valign="top">0.798 (0.006)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MLP<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">0.899 (0.004)</td><td align="left" valign="top">0.898 (0.010)</td><td align="left" valign="top">0.897 (0.003)</td><td align="left" valign="top">0.788 (0.005)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Autoencoder</td><td align="left" valign="top">0.905 (0.004)</td><td align="left" valign="top">0.878 (0.011)</td><td align="left" valign="top">0.892 (0.004)</td><td align="left" valign="top">0.800 (0.005)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GAT<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="top">0.899 (0.006)</td><td align="left" valign="top">0.845 (0.023)</td><td align="left" valign="top">0.870 (0.012)</td><td align="left" valign="top">0.777 (0.008)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GCN<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="left" valign="top">0.913 (0.004)</td><td align="left" valign="top">0.825 (0.032)</td><td align="left" valign="top">0.861 (0.018)</td><td align="left" valign="top">0.799 (0.005)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GraphSAGE</td><td align="left" valign="top">0.907 (0.003)</td><td align="left" valign="top">0.875 (0.011)</td><td align="left" valign="top">0.890 (0.005)</td><td align="left" valign="top">0.801 (0.004)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Proposed method</td><td align="left" valign="top">0.925 (0.005)</td><td align="left" valign="top">0.899 (0.017)</td><td align="left" valign="top">0.913 (0.008)</td><td align="left" valign="top">0.859 (0.011)</td></tr><tr><td align="left" valign="top">Nondiabetic MAFLD</td><td align="left" valign="top" colspan="4"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Decision tree</td><td align="left" valign="top">0.436 (0.016)</td><td align="left" valign="top">0.253 (0.021)</td><td align="left" valign="top">0.316 (0.019)</td><td align="left" valign="top">0.781 (0.010)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Random forest</td><td align="left" valign="top">0.444 (0.021)</td><td align="left" valign="top">0.334 (0.031)</td><td align="left" valign="top">0.359 (0.028)</td><td align="left" valign="top">0.787 (0.009)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost</td><td align="left" valign="top">0.495 (0.020)</td><td align="left" valign="top">0.251 (0.016)</td><td align="left" valign="top">0.329 (0.015)</td><td align="left" valign="top">0.803 (0.005)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MLP</td><td align="left" valign="top">0.423 (0.016)</td><td align="left" valign="top">0.392 (0.026)</td><td align="left" valign="top">0.400 (0.017)</td><td align="left" valign="top">0.800 (0.006)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Autoencoder</td><td align="left" valign="top">0.347 (0.026)</td><td align="left" valign="top">0.344 (0.020)</td><td align="left" valign="top">0.337 (0.014)</td><td align="left" valign="top">0.777 (0.008)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GAT</td><td align="left" valign="top">0.301 (0.022)</td><td align="left" valign="top">0.387 (0.045)</td><td align="left" valign="top">0.323 (0.014)</td><td align="left" valign="top">0.777 (0.007)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GCN</td><td align="left" valign="top">0.280 (0.014)</td><td align="left" valign="top">0.421 (0.055)</td><td align="left" valign="top">0.317 (0.016)</td><td align="left" valign="top">0.765 (0.007)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GraphSAGE</td><td align="left" valign="top">0.384 (0.018)</td><td align="left" valign="top">0.405 (0.023)</td><td align="left" valign="top">0.388 (0.014)</td><td align="left" valign="top">0.804 (0.008)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Proposed method</td><td align="left" valign="top">0.506 (0.016)</td><td align="left" valign="top">0.563 (0.021)</td><td align="left" valign="top">0.531 (0.019)</td><td align="left" valign="top">0.878 (0.003)</td></tr><tr><td align="left" valign="top">Diabetic MAFLD</td><td align="left" valign="top" colspan="4"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Decision tree</td><td align="left" valign="top">0.331 (0.022)</td><td align="left" valign="top">0.210 (0.013)</td><td align="left" valign="top">0.255 (0.015)</td><td align="left" valign="top">0.769 (0.018)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Random forest</td><td align="left" valign="top">0.392 (0.023)</td><td align="left" valign="top">0.381 (0.035)</td><td align="left" valign="top">0.363 (0.024)</td><td align="left" valign="top">0.891 (0.012)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost</td><td align="left" valign="top">0.450 (0.027)</td><td align="left" valign="top">0.255 (0.010)</td><td align="left" valign="top">0.323 (0.012)</td><td align="left" valign="top">0.848 (0.018)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MLP</td><td align="left" valign="top">0.376 (0.020)</td><td align="left" valign="top">0.421 (0.053)</td><td align="left" valign="top">0.371 (0.020)</td><td align="left" valign="top">0.905 (0.008)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Autoencoder</td><td align="left" valign="top">0.358 (0.045)</td><td align="left" valign="top">0.480 (0.071)</td><td align="left" valign="top">0.354 (0.023)</td><td align="left" valign="top">0.920 (0.003)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GAT</td><td align="left" valign="top">0.378 (0.043)</td><td align="left" valign="top">0.395 (0.061)</td><td align="left" valign="top">0.344 (0.022)</td><td align="left" valign="top">0.915 (0.006)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GCN</td><td align="left" valign="top">0.322 (0.025)</td><td align="left" valign="top">0.417 (0.046)</td><td align="left" valign="top">0.353 (0.025)</td><td align="left" valign="top">0.907 (0.007)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GraphSAGE</td><td align="left" valign="top">0.330 (0.024)</td><td align="left" valign="top">0.519 (0.033)</td><td align="left" valign="top">0.398 (0.023)</td><td align="left" valign="top">0.917 (0.005)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Proposed method</td><td align="left" valign="top">0.500 (0.016)</td><td align="left" valign="top">0.570 (0.042)</td><td align="left" valign="top">0.519 (0.019)</td><td align="left" valign="top">0.957 (0.009)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>MAFLD: metabolic dysfunction&#x2013;associated fatty liver disease.</p></fn><fn id="table3fn2"><p><sup>b</sup>XGBoost: extreme gradient boosting.</p></fn><fn id="table3fn3"><p><sup>c</sup>MLP: multilayer perceptron.</p></fn><fn id="table3fn4"><p><sup>d</sup>GAT: graph attention network.</p></fn><fn id="table3fn5"><p><sup>e</sup>GCN: graph convolutional network.</p></fn></table-wrap-foot></table-wrap><p>The box plots in <xref ref-type="fig" rid="figure6">Figure 6</xref> indicate the proposed method&#x2019;s robust performance for each outcome class across 10 trials. It attained high <italic>F</italic><sub>1</sub>-scores for each outcome class, especially nondiabetic MAFLD and diabetic MAFLD, while the benchmark methods exhibited notably greater variance and occasional outliers. Together, these plots provide further evidence of the proposed method&#x2019;s efficacy and value for clinical decision-making and patient management.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Box plots showing <italic>F</italic><sub>1</sub>-scores (median and IQR) of each method for different outcome classes. AE: autoencoder; DT: decision tree; GAT: graph attention network; GCN: graph convolutional network; MAFLD: metabolic dysfunction&#x2013;associated fatty liver disease; MLP: multilayer perceptron; RF: random forest; XGBoost: extreme gradient boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e75747_fig06.png"/></fig></sec><sec id="s3-3"><title>Ablation Study</title><p>We also performed an ablation study to examine the relative contribution of each key component of the proposed method. We considered MLP, Graph, Graph + contrastive learning, and the (complete) proposed method. In essence, MLP serves as a baseline because it only uses the preprocessed data, without any key components of the proposed method. Graph builds on MLP and includes the graph representation learning of genetic family history and lifestyle data, together with the learned embeddings concatenated to the preprocessed dataset to train the MLP classifier. Graph + contrastive learning further extends Graph by incorporating contrastive learning after graph representation learning. The complete proposed method included all 3 key components. The results of the ablation study (<xref ref-type="table" rid="table4">Table 4</xref>) revealed how each component contributed to the method&#x2019;s performance. They jointly produced the best predictions, indicating that MAFLD phenotype prediction can benefit from graph representation, multiview contrastive pretraining, and 2-stage estimation design.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Results of the ablation study.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">AUC<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">MLP<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="left" valign="top">0.831 (0.002)</td></tr><tr><td align="left" valign="top">Graph</td><td align="left" valign="top">0.847 (0.004)</td></tr><tr><td align="left" valign="top">Graph + CL<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="left" valign="top">0.881 (0.001)</td></tr><tr><td align="left" valign="top">Complete proposed method</td><td align="left" valign="top">0.898 (0.003)</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>AUC: area under the curve.</p></fn><fn id="table4fn2"><p><sup>b</sup>MLP: multilayer perceptron.</p></fn><fn id="table4fn3"><p><sup>c</sup>CL: contrastive learning.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-4"><title>Interpretability Analysis</title><p>To gain clinical insights into the proposed method&#x2019;s learned representations, we examined its interpretability by depicting the embeddings visually. Specifically, we applied t-distributed stochastic neighbor embedding (t-SNE) [<xref ref-type="bibr" rid="ref81">81</xref>] to visualize the contrastive pretraining embeddings and performed a Shapley additive explanation (SHAP) analysis [<xref ref-type="bibr" rid="ref82">82</xref>] to reveal feature importance. <xref ref-type="fig" rid="figure7">Figure 7A</xref> presents a visualization of the original lifestyle and genetic features, and <xref ref-type="fig" rid="figure7">Figure 7B</xref> provides a visualization of the features obtained by concatenating the contrastive pretraining embeddings with the original lifestyle and genetic features. The original lifestyle and genetic features exhibited a scattered distribution, without any clear patterns. With contrastive pretraining embeddings, more distinctive clusters emerged, suggesting that patients with similar characteristics tend to cluster more closely than those with dissimilar characteristics. While these visual plots are exploratory without formal proof of class separability, they still illustrate that incorporating contrastive pretraining embeddings can potentially create a more structured, distinguishable representation of patient outcomes for effective MAFLD phenotype prediction.</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>T-distributed stochastic neighbor embedding (t-SNE) visualization of (A) original lifestyle and genetic (life/gene) features and (B) contrastive pretraining embeddings with lifestyle and genetic features. MAFLD: metabolic dysfunction&#x2013;associated fatty liver disease.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e75747_fig07.png"/></fig><p>We further examined the feature importance for each outcome class, as depicted by the SHAP summary plots in <xref ref-type="fig" rid="figure8">Figure 8</xref>. Because the proposed method adopted a 2-stage estimation (architecture) design, the model-agnostic explainer KernelSHAP was used with a background dataset of 100 training instances. For all test instances, SHAP values were computed on a representative model instance (ie, median test AUC across 10 trials). As seen in <xref ref-type="fig" rid="figure8">Figure 8</xref>, several metabolic indicators were important predictors consistently across different outcome classes. For example, BMI and waist circumference were highly influential. As <xref ref-type="fig" rid="figure8">Figure 8A</xref> shows, high BMI values (marked as red points) greatly reduced the likelihood of non-MAFLD predictions; particularly, high BMI and waist circumference values were associated with a greater likelihood of nondiabetic MAFLD or diabetic MAFLD, as shown in <xref ref-type="fig" rid="figure8">Figure 8B and C</xref>. Predictions of nondiabetic MAFLD were influenced by a combination of general metabolic indicators (eg, BMI and waist circumference) and lifestyle factors (eg, smoking and sleep disturbance). For diabetic MAFLD, definitive disease markers and factors related to disease consequence and management, such as self-care status and nutritional status (Mini Nutritional Assessment), appeared to be essential auxiliary predictors. These results align with clinical knowledge and reveal the proposed method&#x2019;s ability to capture phenotype-specific patterns from patient data, with desirable interpretability.</p><fig position="float" id="figure8"><label>Figure 8.</label><caption><p>Summary plots of Shapley additive explanation (SHAP) values for (A) non&#x2013;metabolic dysfunction&#x2013;associated fatty liver disease (non-MAFLD), (B) nondiabetic MAFLD, and (C) diabetic MAFLD. ALT/GPT: alanine aminotransferase/glutamic-pyruvic transaminase; AST/GOT: aspartate aminotransferase/glutamic-oxaloacetic transaminase; CVD: cardiovascular disease; GGT: gamma-glutamyl transferase; HbA<sub>1c</sub>: hemoglobin A<sub>1c</sub>; MNA: Mini Nutritional Assessment.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e75747_fig08.png"/></fig><p>Additionally, SHAP analyses allow for reasoning at the individual level. <xref ref-type="fig" rid="figure9">Figure 9</xref> provides a visualization of SHAP values for 10 patients who were predicted to develop diabetic MAFLD. The heat map shows that diabetes mellitus and high hemoglobin A<sub>1c</sub> diagnoses were consistently important predictors for most patients in this group, including patients B, G, and H. We also observed significant intraphenotype variability among patients. For example, the prediction for patient J was also significantly influenced by BMI and waist circumference, whereas triglycerides were a more important factor for patient C. The interpatient variability can help physicians better understand the impact of different factors at the individual level and thereby support personalized care and treatment planning.</p><fig position="float" id="figure9"><label>Figure 9.</label><caption><p>Heat map of sample patients with the predicted phenotype diabetic metabolic dysfunction&#x2013;associated fatty liver disease (diabetic MAFLD) and top 15 features. AST/GOT: aspartate aminotransferase/glutamic oxaloacetic transaminase; HbA<sub>1c</sub>: hemoglobin A<sub>1c</sub>; VLDL, very-low-density lipoprotein.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e75747_fig09.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The proposed method leverages deep learning to estimate MAFLD phenotypes among adults, using graph representation learning and contrastive learning. It provides several methodological novelties that can advance medical informatics research and enhance clinical decision-making for improved patient management. The evaluation results establish its predictive efficacy, demonstrate the value of combining clinical and survey-based data, and underscore the importance of intraphenotype variability and disease dynamics for MAFLD phenotype prediction. Furthermore, this method is generalizable and can be applied to other prediction tasks in similar clinical scenarios (eg, gauging the risk of diabetes or CVD) that feature multisource data, individual heterogeneities, intraclass variance, and intervariable relationships.</p><p>Using the proposed method, physicians will be able to identify individuals at higher risk of fibrosis and generate timely alerts for effective patient-centric care [<xref ref-type="bibr" rid="ref83">83</xref>], which can mitigate the likelihood of significant disease progression and serious patient outcomes. Accurate prediction of MAFLD phenotypes also helps reduce hepatic complications such as CVD, chronic kidney disease [<xref ref-type="bibr" rid="ref16">16</xref>], hepatocellular carcinoma [<xref ref-type="bibr" rid="ref6">6</xref>], osteoporosis, endocrine disorders, and cognitive impairment [<xref ref-type="bibr" rid="ref84">84</xref>]. The proposed method is capable of distinguishing high-risk versus low-risk adults on the basis of pathogenesis, spanning lifestyle, genetic, and metabolic factors; as a result, the likelihood of fibrosis or cirrhosis can be reduced, with broad implications for precision medicine and drug development [<xref ref-type="bibr" rid="ref85">85</xref>]. In a related sense, its ability to predict phenotypes in an accurate and timely manner also enables personalized surveillance, treatment choice assessments, lifestyle changes, and treatment planning.</p><p>Although the proposed method does not achieve an objectively high <italic>F</italic><sub>1</sub>-score for MAFLD phenotypes, it still offers meaningful improvements over prevalent methods, even in the presence of the inherent challenges created by highly imbalanced patient clinical data. In our sample, most adults were in the non-MAFLD category, and few had MAFLD phenotypes, which made model training difficult for every method we investigated. This challenge is common to many clinical settings and has been documented across different patient outcome or risk prediction tasks. For example, recent related studies report <italic>F</italic><sub>1</sub>-scores in the range between 0.10 and 0.51 for minority classes [<xref ref-type="bibr" rid="ref86">86</xref>,<xref ref-type="bibr" rid="ref87">87</xref>]. Despite this persistent difficulty, the proposed method consistently outperformed all the benchmarks on MAFLD phenotypes (minority classes), which are clinically important. Hence, the observed improvements with our method represent valuable advances [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref88">88</xref>].</p><p>We illustrate the clinical use of the proposed method as a proactive risk stratification approach for clinical decision support and patient management. In stage 1, it estimates the probability of a person developing MAFLD within 1 year. To flag individuals as high risk, a physician can use the probability to select a decision threshold for balancing the trade-off between precision (the proportion of flagged individuals who are truly at high risk) and recall (the proportion of all true positive individuals who are truly flagged as high risk). If the physician prefers high certainty, they can choose a high threshold value. For example, our post hoc analysis showed that by setting the threshold to 0.60, the proposed method&#x2019;s precision increased to 0.777, that is, approximately 78% of flagged patients indeed developed MAFLD. By choosing an even higher threshold value of 0.70, its precision further increased to 0.820, although at the cost of reduced sensitivity (0.59 in recall). As a result, the physician can identify adults who should be monitored more closely (for example, a semiannual follow-up instead of an annual follow-up), need immediate lifestyle counseling, or require proactive baseline liver function tests to track changes over time.</p><p>Furthermore, the proposed method provides additional insights based on the stage 2 estimate, which can support personalized planning and care. In general, obtaining clinically meaningful precision requires a higher threshold value. For example, with a threshold value of 0.50, the proposed method&#x2019;s precision reached 0.506 for nondiabetic MAFLD and 0.500 for diabetic MAFLD. Emphasizing high-probability instances with a threshold value of 0.70 increased the precision to 0.762 and 0.778, respectively, which would allow physicians to tailor management strategies for adults whose phenotype can be predicted with higher confidence. Additionally, physicians can leverage the instance-level SHAP analysis, as depicted in <xref ref-type="fig" rid="figure9">Figure 9</xref>, to identify the specific factors that drive patient risk. While these insights do not directly indicate a definitive diagnosis, they can still guide physicians to engage in preventive care through patient risk stratification, while coping with the challenge of precise phenotype classification. Overall, physicians can adopt an appropriate threshold value to balance precision and recall while minimizing the likelihood of missing at-risk individuals for proactive stratification.</p><p>In summary, a multiview architecture leverages complementary information from lifestyle, genetic, and clinical data perspectives for richer representations that help distinguish infrequent yet clinically important MAFLD phenotypes, without sacrificing interpretability. The 2-stage design offers flexibility and additional utility. Accurate and robust estimates in stage 1 help physicians assess whether or not an individual is likely to develop MAFLD for initial screening purposes. In addition to that determination, even a moderate improvement in the <italic>F</italic><sub>1</sub>-score in stage 2 can facilitate physicians&#x2019; decision-making by providing additional information and clinical insights. These valuable risk stratification capabilities enable physicians to identify high-risk adults who may need close monitoring or alternative treatments. The 2-stage design also offers beneficial flexibility. Physicians can adjust their focus across the first or second stage, depending on their objective (eg, early screening, risk stratification, or intervention planning). According to 2 experienced hepatologists (who wish to remain anonymous), &#x201C;Early, better estimates of individuals&#x2019; likelihood of MAFLD is valuable clinically,&#x201D; and &#x201C;The use of data-driven analytics methods to predict MAFLD phenotypes can enhance clinical decision-making and personalized patient management&#x201D; (September 2, 2025). These expert inputs affirm the clinical value and practicality of our proposed method.</p></sec><sec id="s4-2"><title>Limitations and Research Directions</title><p>This study has several limitations, and it can be extended by further research. First, we used a sample from a single healthcare organization, which offered relatively limited diversity in terms of data sources and patient populations. In a related sense, our sample was imbalanced in the outcome class distribution, which constrained the prediction performance for minority classes, as reflected by the relatively low <italic>F</italic><sub>1</sub>-scores, which is in line with previous research [<xref ref-type="bibr" rid="ref87">87</xref>]. Future studies should consider additional data sources and types such as image and text [<xref ref-type="bibr" rid="ref89">89</xref>] to extend the proposed method, use different patient cohorts to affirm its efficacy, and apply synthetic data augmentation or multimodal foundation models to better address the issue of imbalanced outcome class distribution with cross-modal learning capabilities [<xref ref-type="bibr" rid="ref62">62</xref>]. Second, because intraphenotype variability introduces complexity with regard to achieving compact clusters in the embedding space, a trade-off arises between variability and compactness, which could restrict the predictive utility for large datasets or different diseases. Therefore, we call for efforts to explore an optimal balance of variability and compactness for both accuracy and generalizability, such as clustering-based contrastive learning [<xref ref-type="bibr" rid="ref90">90</xref>]. Third, the proposed 2-stage method provides some limited interpretability, through a feature attribution&#x2013;based approach (ie, SHAP); its contrastive pretraining component deserves further exploration for greater transferability and interpretability. Ongoing efforts could facilitate and interpret embeddings in focal clinical contexts. Fourth, an international, multisociety Delphi process led to the proposal of metabolic dysfunction&#x2013;associated steatotic liver disease (MASLD) in 2023 [<xref ref-type="bibr" rid="ref91">91</xref>]. Although our findings might be extrapolated to adults with MASLD [<xref ref-type="bibr" rid="ref92">92</xref>], the proposed method should be extended with research that tests for differences between MAFLD and MASLD and refines the proposed method to ensure robustness and prediction performance.</p></sec><sec id="s4-3"><title>Conclusion</title><p>Predicting MAFLD phenotypes among adults is crucial, but existing analytic methods overlook its multisystem nature and phenotypic heterogeneity. As a solution, we developed a novel method that leverages graph representation learning, multiview contrastive pretraining, and a 2-stage estimation design to produce effective predictions that reflect phenotypic heterogeneity, complex relationships, and disease dynamics. It is effective in identifying at-risk adults and thus offers support for clinical decision-making and personalized care. This study reveals a promising pathway to advance health informatics research and clinical practice by leveraging rich, detailed clinical data in electronic health records and survey-based data to predict MAFLD phenotypes.</p></sec></sec></body><back><ack><p>This work was partially supported by the Chang Gung Memorial Hospital Research Project (CRRPG2H0061-5).</p></ack><notes><sec><title>Data Availability</title><p>The data used in this study cannot be made publicly accessible, because the patient consensus that we obtained does not articulate data access by other institutions and individuals.</p><p>The authors can arrange data access upon request.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ATN</term><def><p>adaptive temperature network</p></def></def-item><def-item><term id="abb2">AUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb3">CVD</term><def><p>cardiovascular disease</p></def></def-item><def-item><term id="abb4">DT</term><def><p>decision tree</p></def></def-item><def-item><term id="abb5">GAT</term><def><p>graph attention network</p></def></def-item><def-item><term id="abb6">GCN</term><def><p>graph convolutional network</p></def></def-item><def-item><term id="abb7">MAFLD</term><def><p>metabolic dysfunction&#x2013;associated fatty liver disease</p></def></def-item><def-item><term id="abb8">MASLD</term><def><p>metabolic dysfunction&#x2013;associated steatotic liver disease</p></def></def-item><def-item><term id="abb9">MLP</term><def><p>multilayer perceptron</p></def></def-item><def-item><term id="abb10">NN</term><def><p>neural network</p></def></def-item><def-item><term id="abb11">RF</term><def><p>random forest</p></def></def-item><def-item><term id="abb12">SHAP</term><def><p>Shapley additive explanation</p></def></def-item><def-item><term id="abb13">t-SNE</term><def><p>t-distributed stochastic neighbor embedding</p></def></def-item><def-item><term id="abb14">XGBoost</term><def><p>extreme gradient boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>D</given-names> </name><name name-style="western"><surname>Konyn</surname><given-names>P</given-names> </name><name name-style="western"><surname>Sandhu</surname><given-names>KK</given-names> </name><name name-style="western"><surname>Dennis</surname><given-names>BB</given-names> </name><name name-style="western"><surname>Cheung</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Ahmed</surname><given-names>A</given-names> </name></person-group><article-title>Metabolic dysfunction-associated fatty liver disease is associated with increased all-cause mortality in the United States</article-title><source>J Hepatol</source><year>2021</year><month>12</month><volume>75</volume><issue>6</issue><fpage>1284</fpage><lpage>1291</lpage><pub-id pub-id-type="doi">10.1016/j.jhep.2021.07.035</pub-id><pub-id pub-id-type="medline">34380057</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Devarbhavi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Asrani</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Arab</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Nartey</surname><given-names>YA</given-names> </name><name name-style="western"><surname>Pose</surname><given-names>E</given-names> </name><name name-style="western"><surname>Kamath</surname><given-names>PS</given-names> </name></person-group><article-title>Global burden of liver disease: 2023 update</article-title><source>J Hepatol</source><year>2023</year><month>08</month><volume>79</volume><issue>2</issue><fpage>516</fpage><lpage>537</lpage><pub-id pub-id-type="doi">10.1016/j.jhep.2023.03.017</pub-id><pub-id pub-id-type="medline">36990226</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Younossi</surname><given-names>ZM</given-names> </name><name name-style="western"><surname>Blissett</surname><given-names>D</given-names> </name><name name-style="western"><surname>Blissett</surname><given-names>R</given-names> </name><etal/></person-group><article-title>The economic and clinical burden of nonalcoholic fatty liver disease in the United States and Europe</article-title><source>Hepatology</source><year>2016</year><month>11</month><volume>64</volume><issue>5</issue><fpage>1577</fpage><lpage>1586</lpage><pub-id pub-id-type="doi">10.1002/hep.28785</pub-id><pub-id pub-id-type="medline">27543837</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gofton</surname><given-names>C</given-names> </name><name name-style="western"><surname>Upendran</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zheng</surname><given-names>MH</given-names> </name><name name-style="western"><surname>George</surname><given-names>J</given-names> </name></person-group><article-title>MAFLD: How is it different from NAFLD?</article-title><source>Clin Mol Hepatol</source><year>2023</year><month>02</month><volume>29</volume><issue>Suppl</issue><fpage>S17</fpage><lpage>S31</lpage><pub-id pub-id-type="doi">10.3350/cmh.2022.0367</pub-id><pub-id pub-id-type="medline">36443926</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>De</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ahmad</surname><given-names>N</given-names> </name><name name-style="western"><surname>Mehta</surname><given-names>M</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>P</given-names> </name><name name-style="western"><surname>Duseja</surname><given-names>A</given-names> </name></person-group><article-title>NAFLD vs. MAFLD - it is not the name but the disease that decides the outcome in fatty liver</article-title><source>J Hepatol</source><year>2022</year><month>02</month><volume>76</volume><issue>2</issue><fpage>475</fpage><lpage>477</lpage><pub-id pub-id-type="doi">10.1016/j.jhep.2021.09.002</pub-id><pub-id pub-id-type="medline">34530064</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>DQ</given-names> </name><name name-style="western"><surname>El-Serag</surname><given-names>HB</given-names> </name><name name-style="western"><surname>Loomba</surname><given-names>R</given-names> </name></person-group><article-title>Global epidemiology of NAFLD-related HCC: trends, predictions, risk factors and prevention</article-title><source>Nat Rev Gastroenterol Hepatol</source><year>2021</year><month>04</month><volume>18</volume><issue>4</issue><fpage>223</fpage><lpage>238</lpage><pub-id pub-id-type="doi">10.1038/s41575-020-00381-6</pub-id><pub-id pub-id-type="medline">33349658</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yamamura</surname><given-names>S</given-names> </name><name name-style="western"><surname>Eslam</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kawaguchi</surname><given-names>T</given-names> </name><etal/></person-group><article-title>MAFLD identifies patients with significant hepatic fibrosis better than NAFLD</article-title><source>Liver Int</source><year>2020</year><month>12</month><volume>40</volume><issue>12</issue><fpage>3018</fpage><lpage>3030</lpage><pub-id pub-id-type="doi">10.1111/liv.14675</pub-id><pub-id pub-id-type="medline">32997882</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stefan</surname><given-names>N</given-names> </name><name name-style="western"><surname>Yki-J&#x00E4;rvinen</surname><given-names>H</given-names> </name><name name-style="western"><surname>Neuschwander-Tetri</surname><given-names>BA</given-names> </name></person-group><article-title>Metabolic dysfunction-associated steatotic liver disease: heterogeneous pathomechanisms and effectiveness of metabolism-based treatment</article-title><source>Lancet Diabetes Endocrinol</source><year>2025</year><month>02</month><volume>13</volume><issue>2</issue><fpage>134</fpage><lpage>148</lpage><pub-id pub-id-type="doi">10.1016/S2213-8587(24)00318-8</pub-id><pub-id pub-id-type="medline">39681121</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tampaki</surname><given-names>M</given-names> </name><name name-style="western"><surname>Papatheodoridis</surname><given-names>GV</given-names> </name><name name-style="western"><surname>Cholongitas</surname><given-names>E</given-names> </name></person-group><article-title>Management of hepatocellular carcinoma in decompensated cirrhotic patients: a comprehensive overview</article-title><source>Cancers (Basel)</source><year>2023</year><month>02</month><day>18</day><volume>15</volume><issue>4</issue><fpage>1310</fpage><pub-id pub-id-type="doi">10.3390/cancers15041310</pub-id><pub-id pub-id-type="medline">36831651</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dowman</surname><given-names>JK</given-names> </name><name name-style="western"><surname>Armstrong</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Tomlinson</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Newsome</surname><given-names>PN</given-names> </name></person-group><article-title>Current therapeutic strategies in non-alcoholic fatty liver disease</article-title><source>Diabetes Obes Metab</source><year>2011</year><month>08</month><volume>13</volume><issue>8</issue><fpage>692</fpage><lpage>702</lpage><pub-id pub-id-type="doi">10.1111/j.1463-1326.2011.01403.x</pub-id><pub-id pub-id-type="medline">21449949</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moriwaki</surname><given-names>H</given-names> </name></person-group><article-title>Prevention of liver cancer: basic and clinical aspects</article-title><source>Exp Mol Med</source><year>2002</year><month>11</month><day>30</day><volume>34</volume><issue>5</issue><fpage>319</fpage><lpage>325</lpage><pub-id pub-id-type="doi">10.1038/emm.2002.45</pub-id><pub-id pub-id-type="medline">12526094</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eslam</surname><given-names>M</given-names> </name><name name-style="western"><surname>Newsome</surname><given-names>PN</given-names> </name><name name-style="western"><surname>Sarin</surname><given-names>SK</given-names> </name><etal/></person-group><article-title>A new definition for metabolic dysfunction-associated fatty liver disease: an international expert consensus statement</article-title><source>J Hepatol</source><year>2020</year><month>07</month><volume>73</volume><issue>1</issue><fpage>202</fpage><lpage>209</lpage><pub-id pub-id-type="doi">10.1016/j.jhep.2020.03.039</pub-id><pub-id pub-id-type="medline">32278004</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sohn</surname><given-names>W</given-names> </name><name name-style="western"><surname>Kwon</surname><given-names>HJ</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ryu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Cho</surname><given-names>YK</given-names> </name></person-group><article-title>Liver fibrosis in Asians with metabolic dysfunction-associated fatty liver disease</article-title><source>Clin Gastroenterol Hepatol</source><year>2022</year><month>05</month><volume>20</volume><issue>5</issue><fpage>e1135</fpage><lpage>e1148</lpage><pub-id pub-id-type="doi">10.1016/j.cgh.2021.06.042</pub-id><pub-id pub-id-type="medline">34224877</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lim</surname><given-names>TS</given-names> </name><name name-style="western"><surname>Chun</surname><given-names>HS</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>SS</given-names> </name><etal/></person-group><article-title>Fibrotic burden in the liver differs across metabolic dysfunction-associated fatty liver disease subtypes</article-title><source>Gut Liver</source><year>2023</year><month>07</month><day>15</day><volume>17</volume><issue>4</issue><fpage>610</fpage><lpage>619</lpage><pub-id pub-id-type="doi">10.5009/gnl220400</pub-id><pub-id pub-id-type="medline">36799062</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Santos</surname><given-names>RD</given-names> </name><name name-style="western"><surname>Valenti</surname><given-names>L</given-names> </name><name name-style="western"><surname>Romeo</surname><given-names>S</given-names> </name></person-group><article-title>Does nonalcoholic fatty liver disease cause cardiovascular disease? Current knowledge and gaps</article-title><source>Atherosclerosis</source><year>2019</year><month>03</month><volume>282</volume><fpage>110</fpage><lpage>120</lpage><pub-id pub-id-type="doi">10.1016/j.atherosclerosis.2019.01.029</pub-id><pub-id pub-id-type="medline">30731283</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>TY</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>RF</given-names> </name><name name-style="western"><surname>Bu</surname><given-names>ZY</given-names> </name><etal/></person-group><article-title>Association of metabolic dysfunction-associated fatty liver disease with kidney disease</article-title><source>Nat Rev Nephrol</source><year>2022</year><month>04</month><volume>18</volume><issue>4</issue><fpage>259</fpage><lpage>268</lpage><pub-id pub-id-type="doi">10.1038/s41581-021-00519-y</pub-id><pub-id pub-id-type="medline">35013596</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sakurai</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Kubota</surname><given-names>N</given-names> </name><name name-style="western"><surname>Yamauchi</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kadowaki</surname><given-names>T</given-names> </name></person-group><article-title>Role of insulin resistance in MAFLD</article-title><source>Int J Mol Sci</source><year>2021</year><month>04</month><day>16</day><volume>22</volume><issue>8</issue><fpage>4156</fpage><pub-id pub-id-type="doi">10.3390/ijms22084156</pub-id><pub-id pub-id-type="medline">33923817</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fukunaga</surname><given-names>S</given-names> </name><name name-style="western"><surname>Nakano</surname><given-names>D</given-names> </name><name name-style="western"><surname>Kawaguchi</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Non-obese MAFLD is associated with colorectal adenoma in health check examinees: a multicenter retrospective study</article-title><source>Int J Mol Sci</source><year>2021</year><month>05</month><day>22</day><volume>22</volume><issue>11</issue><fpage>5462</fpage><pub-id pub-id-type="doi">10.3390/ijms22115462</pub-id><pub-id pub-id-type="medline">34067258</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ou</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>M</given-names> </name><etal/></person-group><article-title>MAFLD criteria guide the subtyping of patients with fatty liver disease</article-title><source>Risk Manag Healthc Policy</source><year>2021</year><volume>Volume 14</volume><fpage>491</fpage><lpage>501</lpage><pub-id pub-id-type="doi">10.2147/RMHP.S285880</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eslam</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sanyal</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>George</surname><given-names>J</given-names> </name><collab>International Consensus Panel</collab></person-group><article-title>MAFLD: a consensus-driven proposed nomenclature for metabolic associated fatty liver disease</article-title><source>Gastroenterology</source><year>2020</year><month>05</month><volume>158</volume><issue>7</issue><fpage>1999</fpage><lpage>2014</lpage><pub-id pub-id-type="doi">10.1053/j.gastro.2019.11.312</pub-id><pub-id pub-id-type="medline">32044314</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chung</surname><given-names>GE</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Yoo</surname><given-names>JJ</given-names> </name><etal/></person-group><article-title>Lean or diabetic subtypes predict increased all-cause and disease-specific mortality in metabolic-associated fatty liver disease</article-title><source>BMC Med</source><year>2023</year><month>01</month><day>4</day><volume>21</volume><issue>1</issue><fpage>4</fpage><pub-id pub-id-type="doi">10.1186/s12916-022-02716-3</pub-id><pub-id pub-id-type="medline">36600263</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Xue</surname><given-names>H</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>R</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>K</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>X</given-names> </name></person-group><article-title>Associations of MAFLD and MAFLD subtypes with the risk of the incident myocardial infarction and stroke</article-title><source>Diabetes Metab</source><year>2023</year><month>09</month><volume>49</volume><issue>5</issue><fpage>101468</fpage><pub-id pub-id-type="doi">10.1016/j.diabet.2023.101468</pub-id><pub-id pub-id-type="medline">37586479</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kwon</surname><given-names>OY</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>JY</given-names> </name><name name-style="western"><surname>Jang</surname><given-names>Y</given-names> </name></person-group><article-title>The effectiveness of eHealth interventions on lifestyle modification in patients with nonalcoholic fatty liver disease: systematic review and meta-analysis</article-title><source>J Med Internet Res</source><year>2023</year><month>01</month><day>23</day><volume>25</volume><fpage>e37487</fpage><pub-id pub-id-type="doi">10.2196/37487</pub-id><pub-id pub-id-type="medline">36689264</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kleiner</surname><given-names>DE</given-names> </name></person-group><article-title>Hepatocellular carcinoma: liver biopsy in the balance</article-title><source>Hepatology</source><year>2018</year><month>07</month><volume>68</volume><issue>1</issue><fpage>13</fpage><lpage>15</lpage><pub-id pub-id-type="doi">10.1002/hep.29831</pub-id><pub-id pub-id-type="medline">29405373</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ronot</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bahrami</surname><given-names>S</given-names> </name><name name-style="western"><surname>Calderaro</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Hepatocellular adenomas: accuracy of magnetic resonance imaging and liver biopsy in subtype classification</article-title><source>Hepatology</source><year>2011</year><month>04</month><volume>53</volume><issue>4</issue><fpage>1182</fpage><lpage>1191</lpage><pub-id pub-id-type="doi">10.1002/hep.24147</pub-id><pub-id pub-id-type="medline">21480324</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kantartzis</surname><given-names>K</given-names> </name><name name-style="western"><surname>Rettig</surname><given-names>I</given-names> </name><name name-style="western"><surname>Staiger</surname><given-names>H</given-names> </name><etal/></person-group><article-title>An extended fatty liver index to predict non-alcoholic fatty liver disease</article-title><source>Diabetes Metab</source><year>2017</year><month>06</month><volume>43</volume><issue>3</issue><fpage>229</fpage><lpage>239</lpage><pub-id pub-id-type="doi">10.1016/j.diabet.2016.11.006</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ben-Assuli</surname><given-names>O</given-names> </name><name name-style="western"><surname>Jacobi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Goldman</surname><given-names>O</given-names> </name><etal/></person-group><article-title>Stratifying individuals into non-alcoholic fatty liver disease risk levels using time series machine learning models</article-title><source>J Biomed Inform</source><year>2022</year><month>02</month><volume>126</volume><fpage>103986</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2022.103986</pub-id><pub-id pub-id-type="medline">35007752</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cheng</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>YM</given-names> </name><name name-style="western"><surname>Hsieh</surname><given-names>TH</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>CC</given-names> </name><name name-style="western"><surname>Kao</surname><given-names>JH</given-names> </name></person-group><article-title>Prevalence and clinical outcomes in subtypes of metabolic associated fatty liver disease</article-title><source>J Formos Med Assoc</source><year>2024</year><month>01</month><volume>123</volume><issue>1</issue><fpage>36</fpage><lpage>44</lpage><pub-id pub-id-type="doi">10.1016/j.jfma.2023.07.010</pub-id><pub-id pub-id-type="medline">37491179</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Byrne</surname><given-names>CD</given-names> </name><name name-style="western"><surname>Targher</surname><given-names>G</given-names> </name></person-group><article-title>NAFLD: a multisystem disease</article-title><source>J Hepatol</source><year>2015</year><month>04</month><volume>62</volume><issue>1 Suppl</issue><fpage>S47</fpage><lpage>S64</lpage><pub-id pub-id-type="doi">10.1016/j.jhep.2014.12.012</pub-id><pub-id pub-id-type="medline">25920090</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ghazanfar</surname><given-names>H</given-names> </name><name name-style="western"><surname>Javed</surname><given-names>N</given-names> </name><name name-style="western"><surname>Qasim</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Metabolic dysfunction-associated steatohepatitis and progression to hepatocellular carcinoma: a literature review</article-title><source>Cancers (Basel)</source><year>2024</year><month>03</month><day>20</day><volume>16</volume><issue>6</issue><fpage>1214</fpage><pub-id pub-id-type="doi">10.3390/cancers16061214</pub-id><pub-id pub-id-type="medline">38539547</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>TS</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>IW</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>CL</given-names> </name><name name-style="western"><surname>Shyu</surname><given-names>YC</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>YC</given-names> </name><name name-style="western"><surname>Chien</surname><given-names>RN</given-names> </name></person-group><article-title>Prognosis of chronic kidney disease in patients with non-alcoholic fatty liver disease: a northeastern Taiwan community medicine research cohort</article-title><source>Biomed J</source><year>2023</year><month>04</month><volume>46</volume><issue>2</issue><fpage>100532</fpage><pub-id pub-id-type="doi">10.1016/j.bj.2022.04.003</pub-id><pub-id pub-id-type="medline">35460926</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Comparison of MAFLD and NAFLD diagnostic criteria in real world</article-title><source>Liver Int</source><year>2020</year><month>09</month><volume>40</volume><issue>9</issue><fpage>2082</fpage><lpage>2089</lpage><pub-id pub-id-type="doi">10.1111/liv.14548</pub-id><pub-id pub-id-type="medline">32478487</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>MN</given-names> </name><name name-style="western"><surname>Han</surname><given-names>K</given-names> </name><name name-style="western"><surname>Yoo</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hwang</surname><given-names>SG</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Ahn</surname><given-names>SH</given-names> </name></person-group><article-title>Diabetic MAFLD is associated with increased risk of hepatocellular carcinoma and mortality in chronic viral hepatitis patients</article-title><source>Intl Journal of Cancer</source><year>2023</year><month>10</month><day>15</day><volume>153</volume><issue>8</issue><fpage>1448</fpage><lpage>1458</lpage><pub-id pub-id-type="doi">10.1002/ijc.34637</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ballantyne</surname><given-names>CM</given-names> </name></person-group><article-title>Metabolic inflammation and insulin resistance in obesity</article-title><source>Circ Res</source><year>2020</year><month>05</month><day>22</day><volume>126</volume><issue>11</issue><fpage>1549</fpage><lpage>1564</lpage><pub-id pub-id-type="doi">10.1161/CIRCRESAHA.119.315896</pub-id><pub-id pub-id-type="medline">32437299</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kuchay</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Choudhary</surname><given-names>NS</given-names> </name><name name-style="western"><surname>Mishra</surname><given-names>SK</given-names> </name></person-group><article-title>Pathophysiological mechanisms underlying MAFLD</article-title><source>Diabetes Metab Syndr: Clin Res Rev</source><year>2020</year><month>11</month><volume>14</volume><issue>6</issue><fpage>1875</fpage><lpage>1887</lpage><pub-id pub-id-type="doi">10.1016/j.dsx.2020.09.026</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stefano</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Duarte</surname><given-names>SMB</given-names> </name><name name-style="western"><surname>Ribeiro Leite Altikes</surname><given-names>RG</given-names> </name><name name-style="western"><surname>Oliveira</surname><given-names>CP</given-names> </name></person-group><article-title>Non-pharmacological management options for MAFLD: a practical guide</article-title><source>Ther Adv Endocrinol Metab</source><year>2023</year><volume>14</volume><fpage>20420188231160394</fpage><pub-id pub-id-type="doi">10.1177/20420188231160394</pub-id><pub-id pub-id-type="medline">36968655</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lencioni</surname><given-names>R</given-names> </name></person-group><article-title>Loco-regional treatment of hepatocellular carcinoma</article-title><source>Hepatology</source><year>2010</year><month>08</month><volume>52</volume><issue>2</issue><fpage>762</fpage><lpage>773</lpage><pub-id pub-id-type="doi">10.1002/hep.23725</pub-id><pub-id pub-id-type="medline">20564355</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>CT</given-names> </name><name name-style="western"><surname>Chu</surname><given-names>TW</given-names> </name><name name-style="western"><surname>Jang</surname><given-names>JSR</given-names> </name></person-group><article-title>Current-visit and next-visit prediction for fatty liver disease with a large-scale dataset: model development and performance comparison</article-title><source>JMIR Med Inform</source><year>2021</year><month>08</month><day>12</day><volume>9</volume><issue>8</issue><fpage>e26398</fpage><pub-id pub-id-type="doi">10.2196/26398</pub-id><pub-id pub-id-type="medline">34387552</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wong</surname><given-names>VWS</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>GLH</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>RSM</given-names> </name><etal/></person-group><article-title>Beneficial effects of lifestyle intervention in non-obese patients with non-alcoholic fatty liver disease</article-title><source>J Hepatol</source><year>2018</year><month>12</month><volume>69</volume><issue>6</issue><fpage>1349</fpage><lpage>1356</lpage><pub-id pub-id-type="doi">10.1016/j.jhep.2018.08.011</pub-id><pub-id pub-id-type="medline">30142427</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Montemayor</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bouzas</surname><given-names>C</given-names> </name><name name-style="western"><surname>Mascar&#x00F3;</surname><given-names>CM</given-names> </name><etal/></person-group><article-title>Effect of dietary and lifestyle interventions on the amelioration of NAFLD in patients with metabolic syndrome: the FLIPAN study</article-title><source>Nutrients</source><year>2022</year><month>05</month><day>26</day><volume>14</volume><issue>11</issue><fpage>2223</fpage><pub-id pub-id-type="doi">10.3390/nu14112223</pub-id><pub-id pub-id-type="medline">35684022</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Morris</surname><given-names>HL</given-names> </name><etal/></person-group><article-title>Noninvasive diagnosis of nonalcoholic steatohepatitis and advanced liver fibrosis using machine learning methods: comparative study with existing quantitative risk scores</article-title><source>JMIR Med Inform</source><year>2022</year><month>06</month><day>6</day><volume>10</volume><issue>6</issue><fpage>e36997</fpage><pub-id pub-id-type="doi">10.2196/36997</pub-id><pub-id pub-id-type="medline">35666557</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jia</surname><given-names>X</given-names> </name><name name-style="western"><surname>Baig</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Mirza</surname><given-names>F</given-names> </name><name name-style="western"><surname>GholamHosseini</surname><given-names>H</given-names> </name></person-group><article-title>A Cox-based risk prediction model for early detection of cardiovascular disease: identification of key risk factors for the development of a 10-year CVD risk prediction</article-title><source>Adv Prev Med</source><year>2019</year><volume>2019</volume><fpage>8392348</fpage><pub-id pub-id-type="doi">10.1155/2019/8392348</pub-id><pub-id pub-id-type="medline">31093375</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ksi&#x0105;&#x017C;ek</surname><given-names>W</given-names> </name><name name-style="western"><surname>Gandor</surname><given-names>M</given-names> </name><name name-style="western"><surname>P&#x0142;awiak</surname><given-names>P</given-names> </name></person-group><article-title>Comparison of various approaches to combine logistic regression with genetic algorithms in survival prediction of hepatocellular carcinoma</article-title><source>Comput Biol Med</source><year>2021</year><month>07</month><volume>134</volume><fpage>104431</fpage><pub-id pub-id-type="doi">10.1016/j.compbiomed.2021.104431</pub-id><pub-id pub-id-type="medline">34015670</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>CS</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>YJ</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>CH</given-names> </name><etal/></person-group><article-title>Predicting metabolic syndrome with machine learning models using a decision tree algorithm: retrospective cohort study</article-title><source>JMIR Med Inform</source><year>2020</year><month>03</month><day>23</day><volume>8</volume><issue>3</issue><fpage>e17110</fpage><pub-id pub-id-type="doi">10.2196/17110</pub-id><pub-id pub-id-type="medline">32202504</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>CH</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>YJ</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name></person-group><article-title>Development of cost-effective fatty liver disease prediction models in a Chinese population: statistical and machine learning approaches</article-title><source>JMIR Form Res</source><year>2024</year><month>02</month><day>16</day><volume>8</volume><fpage>e53654</fpage><pub-id pub-id-type="doi">10.2196/53654</pub-id><pub-id pub-id-type="medline">38363597</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>G</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Mao</surname><given-names>Y</given-names> </name></person-group><article-title>Predicting the 5-year risk of nonalcoholic fatty liver disease using machine learning models: prospective cohort study</article-title><source>J Med Internet Res</source><year>2023</year><month>09</month><day>12</day><volume>25</volume><fpage>e46891</fpage><pub-id pub-id-type="doi">10.2196/46891</pub-id><pub-id pub-id-type="medline">37698911</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>YS</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>D</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>C</given-names> </name><etal/></person-group><article-title>A novel model for predicting fatty liver disease by means of an artificial neural network</article-title><source>Gastroenterol Rep (Oxf)</source><year>2020</year><month>08</month><volume>9</volume><issue>1</issue><fpage>31</fpage><lpage>37</lpage><pub-id pub-id-type="doi">10.1093/gastro/goaa035</pub-id><pub-id pub-id-type="medline">33747524</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Edelson</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kuo</surname><given-names>TT</given-names> </name></person-group><article-title>Generalizable prediction of COVID-19 mortality on worldwide patient data</article-title><source>JAMIA Open</source><year>2022</year><month>07</month><volume>5</volume><issue>2</issue><fpage>ooac036</fpage><pub-id pub-id-type="doi">10.1093/jamiaopen/ooac036</pub-id><pub-id pub-id-type="medline">35663116</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Franco</surname><given-names>EF</given-names> </name><name name-style="western"><surname>Rana</surname><given-names>P</given-names> </name><name name-style="western"><surname>Cruz</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Performance comparison of deep learning autoencoders for cancer subtype detection using multi-omics data</article-title><source>Cancers (Basel)</source><year>2021</year><month>04</month><day>22</day><volume>13</volume><issue>9</issue><fpage>2013</fpage><pub-id pub-id-type="doi">10.3390/cancers13092013</pub-id><pub-id pub-id-type="medline">33921978</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ruan</surname><given-names>X</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>P</given-names> </name><etal/></person-group><article-title>MSGCL: inferring miRNA-disease associations based on multi-view self-supervised graph structure contrastive learning</article-title><source>Brief Bioinform</source><year>2023</year><month>03</month><day>19</day><volume>24</volume><issue>2</issue><fpage>bbac623</fpage><pub-id pub-id-type="doi">10.1093/bib/bbac623</pub-id><pub-id pub-id-type="medline">36790856</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chowdhury</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Stratifying heart failure patients with graph neural network and transformer using electronic health records to optimize drug response prediction</article-title><source>J Am Med Inform Assoc</source><year>2024</year><month>08</month><day>1</day><volume>31</volume><issue>8</issue><fpage>1671</fpage><lpage>1681</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocae137</pub-id><pub-id pub-id-type="medline">38926131</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>G</given-names> </name><name name-style="western"><surname>Peng</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Yan</surname><given-names>C</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>J</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>H</given-names> </name></person-group><article-title>A novel liver cancer diagnosis method based on patient similarity network and DenseGCN</article-title><source>Sci Rep</source><year>2022</year><volume>12</volume><issue>1</issue><fpage>6797</fpage><pub-id pub-id-type="doi">10.1038/s41598-022-10441-3</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hashem</surname><given-names>S</given-names> </name><name name-style="western"><surname>Esmat</surname><given-names>G</given-names> </name><name name-style="western"><surname>Elakel</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Comparison of machine learning approaches for prediction of advanced liver fibrosis in chronic hepatitis C patients</article-title><source>IEEE/ACM Trans Comput Biol Bioinform</source><year>2018</year><month>05</month><day>1</day><volume>15</volume><issue>3</issue><fpage>861</fpage><lpage>868</lpage><pub-id pub-id-type="doi">10.1109/TCBB.2017.2690848</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>CC</given-names> </name><name name-style="western"><surname>Yeh</surname><given-names>WC</given-names> </name><name name-style="western"><surname>Hsu</surname><given-names>WD</given-names> </name><etal/></person-group><article-title>Prediction of fatty liver disease using machine learning algorithms</article-title><source>Comput Methods Programs Biomed</source><year>2019</year><month>03</month><volume>170</volume><fpage>23</fpage><lpage>29</lpage><pub-id pub-id-type="doi">10.1016/j.cmpb.2018.12.032</pub-id><pub-id pub-id-type="medline">30712601</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Li</surname><given-names>S</given-names> </name><name name-style="western"><surname>Yuan</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Handling missing values in healthcare data: a systematic review of deep learning-based imputation techniques</article-title><source>Artif Intell Med</source><year>2023</year><month>08</month><volume>142</volume><fpage>102587</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2023.102587</pub-id><pub-id pub-id-type="medline">37316097</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kipf</surname><given-names>TN</given-names> </name><name name-style="western"><surname>Welling</surname><given-names>M</given-names> </name></person-group><article-title>Semi-supervised classification with graph convolutional networks</article-title><access-date>2025-09-16</access-date><conf-name>International Conference on Learning Representations (ICLR)</conf-name><conf-date>Apr 24-26, 2017</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://openreview.net/pdf?id=SJU4ayYgl">https://openreview.net/pdf?id=SJU4ayYgl</ext-link></comment></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Veli&#x010D;kovi&#x0107;</surname><given-names>P</given-names> </name><name name-style="western"><surname>Cucurull</surname><given-names>G</given-names> </name><name name-style="western"><surname>Casanova</surname><given-names>A</given-names> </name><name name-style="western"><surname>Romero</surname><given-names>A</given-names> </name><name name-style="western"><surname>Li&#x00F2;</surname><given-names>P</given-names> </name><name name-style="western"><surname>Bengio</surname><given-names>Y</given-names> </name></person-group><article-title>Graph attention networks</article-title><access-date>2025-09-16</access-date><conf-name>International Conference on Learning Representations (ICLR)</conf-name><conf-date>Apr 30 to May 3, 2018</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://openreview.net/pdf?id=rJXMpikCZ">https://openreview.net/pdf?id=rJXMpikCZ</ext-link></comment></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Hamilton</surname><given-names>W</given-names> </name><name name-style="western"><surname>Ying</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Leskovec</surname><given-names>J</given-names> </name></person-group><article-title>Inductive representation learning on large graphs</article-title><source>NIPS'17: Proceedings of the 31st International Conference on Neural Information Processing Systems</source><year>2017</year><publisher-name>Curran Associates Inc</publisher-name><pub-id pub-id-type="doi">10.5555/3294771.3294869</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sangha</surname><given-names>V</given-names> </name><name name-style="western"><surname>Khunte</surname><given-names>A</given-names> </name><name name-style="western"><surname>Holste</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Biometric contrastive learning for data-efficient deep learning from electrocardiographic images</article-title><source>J Am Med Inform Assoc</source><year>2024</year><month>04</month><day>3</day><volume>31</volume><issue>4</issue><fpage>855</fpage><lpage>865</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocae002</pub-id><pub-id pub-id-type="medline">38269618</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Feng</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Applying contrastive pre-training for depression and anxiety risk prediction in type 2 diabetes patients based on heterogeneous electronic health records: a primary healthcare case study</article-title><source>J Am Med Inform Assoc</source><year>2024</year><month>01</month><day>18</day><volume>31</volume><issue>2</issue><fpage>445</fpage><lpage>455</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocad228</pub-id><pub-id pub-id-type="medline">38062850</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>U&#x00E7;ar</surname><given-names>T</given-names> </name><name name-style="western"><surname>Hajiramezanali</surname><given-names>E</given-names> </name><name name-style="western"><surname>Edwards</surname><given-names>L</given-names> </name></person-group><article-title>Subtab: subsetting features of tabular data for self-supervised representation learning</article-title><source>NIPS &#x2019;21: Proceedings of the 35th International Conference on Neural Information Processing Systems</source><year>2021</year><publisher-name>Curran Associates Inc</publisher-name><fpage>18853</fpage><lpage>18865</lpage></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Radford</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Hallacy</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Learning transferable visual models from natural language supervision</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 26, 2021</comment><pub-id pub-id-type="doi">10.48550/arXiv.2103.00020</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>He</surname><given-names>K</given-names> </name><name name-style="western"><surname>Fan</surname><given-names>H</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>S</given-names> </name><name name-style="western"><surname>Girshick</surname><given-names>R</given-names> </name></person-group><article-title>Momentum contrast for unsupervised visual representation learning</article-title><conf-name>2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name><conf-date>Jun 13-19, 2020</conf-date><conf-loc>Seattle, WA</conf-loc><fpage>9729</fpage><lpage>9738</lpage><pub-id pub-id-type="doi">10.1109/CVPR42600.2020.00975</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Gao</surname><given-names>T</given-names> </name><name name-style="western"><surname>Yao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>D</given-names> </name></person-group><article-title>SimCSE: simple contrastive learning of sentence embeddings</article-title><access-date>2025-09-16</access-date><conf-name>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</conf-name><conf-date>Nov 7-11, 2021</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2021.emnlp-main">https://aclanthology.org/2021.emnlp-main</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/2021.emnlp-main.552</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kornblith</surname><given-names>S</given-names> </name><name name-style="western"><surname>Norouzi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hinton</surname><given-names>G</given-names> </name></person-group><article-title>A simple framework for contrastive learning of visual representations</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 13, 2020</comment><pub-id pub-id-type="doi">10.48550/arXiv.2002.05709</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Tian</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Krishnan</surname><given-names>D</given-names> </name><name name-style="western"><surname>Isola</surname><given-names>P</given-names> </name></person-group><article-title>Contrastive multiview coding</article-title><source>arXiv</source><comment>Preprint posted online on  Jun 13, 2019</comment><pub-id pub-id-type="doi">10.48550/arXiv.1906.05849</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rao</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Multi-modality risk prediction of cardiovascular diseases for breast cancer cohort in the All of Us Research Program</article-title><source>J Am Med Inform Assoc</source><year>2024</year><month>12</month><day>1</day><volume>31</volume><issue>12</issue><fpage>2800</fpage><lpage>2810</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocae199</pub-id><pub-id pub-id-type="medline">39058572</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pasadana</surname><given-names>IA</given-names> </name><name name-style="western"><surname>Hartama</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zarlis</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Chronic kidney disease prediction by using different decision tree techniques</article-title><source>J Phys Conf Ser</source><year>2019</year><month>08</month><day>1</day><volume>1255</volume><issue>1</issue><fpage>012024</fpage><pub-id pub-id-type="doi">10.1088/1742-6596/1255/1/012024</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ye</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>Y</given-names> </name></person-group><article-title>A tree ensemble-based two-stage model for advanced-stage colorectal cancer survival prediction</article-title><source>Inf Sci</source><year>2019</year><month>02</month><volume>474</volume><fpage>106</fpage><lpage>124</lpage><pub-id pub-id-type="doi">10.1016/j.ins.2018.09.046</pub-id></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hashem</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Rasmy</surname><given-names>MEM</given-names> </name><name name-style="western"><surname>Wahba</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Shaker</surname><given-names>OG</given-names> </name></person-group><article-title>Single stage and multistage classification models for the prediction of liver fibrosis degree in patients with chronic hepatitis C infection</article-title><source>Comput Methods Programs Biomed</source><year>2012</year><month>03</month><volume>105</volume><issue>3</issue><fpage>194</fpage><lpage>209</lpage><pub-id pub-id-type="doi">10.1016/j.cmpb.2011.10.005</pub-id><pub-id pub-id-type="medline">22070853</pub-id></nlm-citation></ref><ref id="ref71"><label>71</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zheng</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Multi-modal graph learning for disease prediction</article-title><source>IEEE Trans Med Imaging</source><year>2022</year><month>09</month><volume>41</volume><issue>9</issue><fpage>2207</fpage><lpage>2216</lpage><pub-id pub-id-type="doi">10.1109/TMI.2022.3159264</pub-id><pub-id pub-id-type="medline">35286257</pub-id></nlm-citation></ref><ref id="ref72"><label>72</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bugianesi</surname><given-names>E</given-names> </name><name name-style="western"><surname>Gastaldelli</surname><given-names>A</given-names> </name><name name-style="western"><surname>Vanni</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Insulin resistance in non-diabetic patients with non-alcoholic fatty liver disease: sites and mechanisms</article-title><source>Diabetologia</source><year>2005</year><month>04</month><volume>48</volume><issue>4</issue><fpage>634</fpage><lpage>642</lpage><pub-id pub-id-type="doi">10.1007/s00125-005-1682-x</pub-id><pub-id pub-id-type="medline">15747110</pub-id></nlm-citation></ref><ref id="ref73"><label>73</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pan</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>W</given-names> </name></person-group><article-title>Increase statistical reliability without losing predictive power by merging classes and adding variables</article-title><source>BDIA</source><year>2016</year><month>10</month><volume>1</volume><issue>4</issue><fpage>341</fpage><lpage>348</lpage><pub-id pub-id-type="doi">10.3934/bdia.2016014</pub-id></nlm-citation></ref><ref id="ref74"><label>74</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nicodemus</surname><given-names>KK</given-names> </name><name name-style="western"><surname>Malley</surname><given-names>JD</given-names> </name></person-group><article-title>Predictor correlation impacts machine learning algorithms: implications for genomic studies</article-title><source>Bioinformatics</source><year>2009</year><month>08</month><day>1</day><volume>25</volume><issue>15</issue><fpage>1884</fpage><lpage>1890</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btp331</pub-id><pub-id pub-id-type="medline">19460890</pub-id></nlm-citation></ref><ref id="ref75"><label>75</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>O</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bayrooti</surname><given-names>J</given-names> </name><name name-style="western"><surname>Goodman</surname><given-names>N</given-names> </name></person-group><article-title>Temperature as uncertainty in contrastive learning</article-title><source>arXiv</source><comment>Preprint posted online on  Oct 8, 2021</comment><pub-id pub-id-type="doi">10.48550/arXiv.2110.04403</pub-id></nlm-citation></ref><ref id="ref76"><label>76</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Amrollahi</surname><given-names>F</given-names> </name><name name-style="western"><surname>Shashikumar</surname><given-names>SP</given-names> </name><name name-style="western"><surname>Meier</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ohno-Machado</surname><given-names>L</given-names> </name><name name-style="western"><surname>Nemati</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wardi</surname><given-names>G</given-names> </name></person-group><article-title>Inclusion of social determinants of health improves sepsis readmission prediction models</article-title><source>J Am Med Inform Assoc</source><year>2022</year><month>06</month><day>14</day><volume>29</volume><issue>7</issue><fpage>1263</fpage><lpage>1270</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocac060</pub-id><pub-id pub-id-type="medline">35511233</pub-id></nlm-citation></ref><ref id="ref77"><label>77</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ibrahim</surname><given-names>ZM</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hamoud</surname><given-names>A</given-names> </name><name name-style="western"><surname>Stappen</surname><given-names>L</given-names> </name><name name-style="western"><surname>Dobson</surname><given-names>RJB</given-names> </name><name name-style="western"><surname>Agarossi</surname><given-names>A</given-names> </name></person-group><article-title>On classifying sepsis heterogeneity in the ICU: insight using machine learning</article-title><source>J Am Med Inform Assoc</source><year>2020</year><month>03</month><day>1</day><volume>27</volume><issue>3</issue><fpage>437</fpage><lpage>443</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocz211</pub-id><pub-id pub-id-type="medline">31951005</pub-id></nlm-citation></ref><ref id="ref78"><label>78</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Faghri</surname><given-names>F</given-names> </name><name name-style="western"><surname>Brunn</surname><given-names>F</given-names> </name><name name-style="western"><surname>Dadu</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Identifying and predicting amyotrophic lateral sclerosis clinical subgroups: a population-based machine-learning study</article-title><source>Lancet Digit Health</source><year>2022</year><month>05</month><volume>4</volume><issue>5</issue><fpage>e359</fpage><lpage>e369</lpage><pub-id pub-id-type="doi">10.1016/S2589-7500(21)00274-0</pub-id><pub-id pub-id-type="medline">35341712</pub-id></nlm-citation></ref><ref id="ref79"><label>79</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thabtah</surname><given-names>F</given-names> </name><name name-style="western"><surname>Hammoud</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kamalov</surname><given-names>F</given-names> </name><name name-style="western"><surname>Gonsalves</surname><given-names>A</given-names> </name></person-group><article-title>Data imbalance in classification: experimental evaluation</article-title><source>Inf Sci</source><year>2020</year><month>03</month><volume>513</volume><fpage>429</fpage><lpage>441</lpage><pub-id pub-id-type="doi">10.1016/j.ins.2019.11.004</pub-id></nlm-citation></ref><ref id="ref80"><label>80</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Docherty</surname><given-names>M</given-names> </name><name name-style="western"><surname>Regnier</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Capkun</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Development of a novel machine learning model to predict presence of nonalcoholic steatohepatitis</article-title><source>J Am Med Inform Assoc</source><year>2021</year><month>06</month><day>12</day><volume>28</volume><issue>6</issue><fpage>1235</fpage><lpage>1241</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocab003</pub-id><pub-id pub-id-type="medline">33684933</pub-id></nlm-citation></ref><ref id="ref81"><label>81</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van der Maaten</surname><given-names>L</given-names> </name><name name-style="western"><surname>Hinton</surname><given-names>G</given-names> </name></person-group><article-title>Visualizing data using t-SNE</article-title><source>J Mach Learn Res</source><year>2008</year><access-date>2025-09-16</access-date><volume>9</volume><issue>86</issue><fpage>2579</fpage><lpage>2605</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.jmlr.org/papers/v9/vandermaaten08a.html">https://www.jmlr.org/papers/v9/vandermaaten08a.html</ext-link></comment></nlm-citation></ref><ref id="ref82"><label>82</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Lundberg</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SI</given-names> </name></person-group><article-title>A unified approach to interpreting model predictions</article-title><source>arXiv</source><comment>Preprint posted online on  May 22, 2017</comment><pub-id pub-id-type="doi">10.48550/arXiv.1705.07874</pub-id></nlm-citation></ref><ref id="ref83"><label>83</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bouayad</surname><given-names>L</given-names> </name><name name-style="western"><surname>Padmanabhan</surname><given-names>B</given-names> </name><name name-style="western"><surname>Chari</surname><given-names>K</given-names> </name></person-group><article-title>Can recommender systems reduce healthcare costs? The role of time pressure and cost transparency in prescription choice</article-title><source>Manag Inf Syst Q</source><year>2020</year><month>12</month><day>1</day><volume>44</volume><issue>4</issue><fpage>1859</fpage><lpage>1903</lpage><pub-id pub-id-type="doi">10.25300/MISQ/2020/14435/</pub-id></nlm-citation></ref><ref id="ref84"><label>84</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Colognesi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gabbia</surname><given-names>D</given-names> </name><name name-style="western"><surname>De Martin</surname><given-names>S</given-names> </name></person-group><article-title>Depression and cognitive impairment-extrahepatic manifestations of NAFLD and NASH</article-title><source>Biomedicines</source><year>2020</year><month>07</month><day>21</day><volume>8</volume><issue>7</issue><fpage>229</fpage><pub-id pub-id-type="doi">10.3390/biomedicines8070229</pub-id><pub-id pub-id-type="medline">32708059</pub-id></nlm-citation></ref><ref id="ref85"><label>85</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fouad</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Palmer</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Redefinition of fatty liver disease from NAFLD to MAFLD through the lens of drug development and regulatory science</article-title><source>J Clin Transl Hepatol</source><year>2022</year><month>04</month><day>28</day><volume>10</volume><issue>2</issue><fpage>374</fpage><lpage>382</lpage><pub-id pub-id-type="doi">10.14218/JCTH.2021.00408</pub-id><pub-id pub-id-type="medline">35528969</pub-id></nlm-citation></ref><ref id="ref86"><label>86</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>WC</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>A</given-names> </name><name name-style="western"><surname>Song</surname><given-names>X</given-names> </name><name name-style="western"><surname>Weiskopf</surname><given-names>NG</given-names> </name><name name-style="western"><surname>Chiang</surname><given-names>MF</given-names> </name><name name-style="western"><surname>Hribar</surname><given-names>MR</given-names> </name></person-group><article-title>Prediction of multiclass surgical outcomes in glaucoma using multimodal deep learning based on free-text operative notes and structured EHR data</article-title><source>J Am Med Inform Assoc</source><year>2024</year><month>01</month><day>18</day><volume>31</volume><issue>2</issue><fpage>456</fpage><lpage>464</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocad213</pub-id><pub-id pub-id-type="medline">37964658</pub-id></nlm-citation></ref><ref id="ref87"><label>87</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Masayoshi</surname><given-names>K</given-names> </name><name name-style="western"><surname>Hashimoto</surname><given-names>M</given-names> </name><name name-style="western"><surname>Toda</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Training language models for estimating priority levels in ultrasound examination waitlists: algorithm development and validation</article-title><source>JMIR AI</source><year>2025</year><month>07</month><day>22</day><volume>4</volume><fpage>e68020</fpage><pub-id pub-id-type="doi">10.2196/68020</pub-id><pub-id pub-id-type="medline">40694843</pub-id></nlm-citation></ref><ref id="ref88"><label>88</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>X</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Pang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Tang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ling</surname><given-names>W</given-names> </name></person-group><article-title>Are the different MAFLD subtypes based on the inclusion criteria correlated with all-cause mortality?</article-title><source>J Hepatol</source><year>2021</year><month>10</month><volume>75</volume><issue>4</issue><fpage>987</fpage><lpage>989</lpage><pub-id pub-id-type="doi">10.1016/j.jhep.2021.06.013</pub-id><pub-id pub-id-type="medline">34153396</pub-id></nlm-citation></ref><ref id="ref89"><label>89</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>AlSaad</surname><given-names>R</given-names> </name><name name-style="western"><surname>Abd-Alrazaq</surname><given-names>A</given-names> </name><name name-style="western"><surname>Boughorbel</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Multimodal large language models in health care: applications, challenges, and future outlook</article-title><source>J Med Internet Res</source><year>2024</year><month>09</month><day>25</day><volume>26</volume><fpage>e59505</fpage><pub-id pub-id-type="doi">10.2196/59505</pub-id><pub-id pub-id-type="medline">39321458</pub-id></nlm-citation></ref><ref id="ref90"><label>90</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Caron</surname><given-names>M</given-names> </name><name name-style="western"><surname>Misra</surname><given-names>I</given-names> </name><name name-style="western"><surname>Mairal</surname><given-names>J</given-names> </name><name name-style="western"><surname>Goyal</surname><given-names>P</given-names> </name><name name-style="western"><surname>Bojanowski</surname><given-names>P</given-names> </name><name name-style="western"><surname>Joulin</surname><given-names>A</given-names> </name></person-group><article-title>Unsupervised learning of visual features by contrasting cluster assignments</article-title><source>arXiv</source><comment>Preprint posted online on  Jun 17, 2020</comment><pub-id pub-id-type="doi">10.48550/arXiv.2006.09882</pub-id></nlm-citation></ref><ref id="ref91"><label>91</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rinella</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Lazarus</surname><given-names>JV</given-names> </name><name name-style="western"><surname>Ratziu</surname><given-names>V</given-names> </name><etal/></person-group><article-title>A multisociety Delphi consensus statement on new fatty liver disease nomenclature</article-title><source>Hepatology</source><year>2023</year><month>12</month><day>1</day><volume>78</volume><issue>6</issue><fpage>1966</fpage><lpage>1986</lpage><pub-id pub-id-type="doi">10.1097/HEP.0000000000000520</pub-id><pub-id pub-id-type="medline">37363821</pub-id></nlm-citation></ref><ref id="ref92"><label>92</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Younossi</surname><given-names>ZM</given-names> </name><name name-style="western"><surname>Paik</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Stepanova</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ong</surname><given-names>J</given-names> </name><name name-style="western"><surname>Alqahtani</surname><given-names>S</given-names> </name><name name-style="western"><surname>Henry</surname><given-names>L</given-names> </name></person-group><article-title>Clinical profiles and mortality rates are similar for metabolic dysfunction-associated steatotic liver disease and non-alcoholic fatty liver disease</article-title><source>J Hepatol</source><year>2024</year><month>05</month><volume>80</volume><issue>5</issue><fpage>694</fpage><lpage>701</lpage><pub-id pub-id-type="doi">10.1016/j.jhep.2024.01.014</pub-id><pub-id pub-id-type="medline">38286339</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Description and coding of variables.</p><media xlink:href="medinform_v13i1e75747_app1.docx" xlink:title="DOCX File, 32 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Key hyperparameters of the investigated methods.</p><media xlink:href="medinform_v13i1e75747_app2.docx" xlink:title="DOCX File, 15 KB"/></supplementary-material></app-group></back></article>