<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v14i1e80163</article-id><article-id pub-id-type="doi">10.2196/80163</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Comprehensive Pediatric Health Risk Stratification Using an AI-Driven Framework in Children Aged 2 to 8 Years: Design and Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Chen</surname><given-names>Jundan</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Mao</surname><given-names>Zhihe</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>School of Physical Education, Hunan University of Arts and Science</institution><addr-line>3150 Dongting Road</addr-line><addr-line>Changde</addr-line><addr-line>Hunan</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Benis</surname><given-names>Arriel</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Zhang</surname><given-names>Bo</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Li</surname><given-names>Keqing</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Mahmoud</surname><given-names>Mahmoud Badee Rokaya</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Zhihe Mao, BA, School of Physical Education, Hunan University of Arts and Science, 3150 Dongting Road, Changde, Hunan, 415000, China, 86 13789072016, 86 07367283047; <email>lupoedilagjw@outlook.com</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>all authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>26</day><month>1</month><year>2026</year></pub-date><volume>14</volume><elocation-id>e80163</elocation-id><history><date date-type="received"><day>05</day><month>07</month><year>2025</year></date><date date-type="rev-recd"><day>18</day><month>10</month><year>2025</year></date><date date-type="accepted"><day>23</day><month>10</month><year>2025</year></date></history><copyright-statement>&#x00A9; Zhihe Mao, Jundan Chen. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 26.1.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2026/1/e80163"/><related-article related-article-type="correction-forward" ext-link-type="doi" xlink:href="10.2196/92491" xlink:title="This is a corrected version. See correction statement in" xlink:type="simple">https://medinform.jmir.org/2026/1/e92491</related-article><abstract><sec><title>Background</title><p>Early life health risks can shape long-term morbidity trajectories, yet prevailing pediatric risk assessment paradigms are often fragmented and insufficiently capable of integrating heterogeneous data streams into actionable, individualized profiles.</p></sec><sec><title>Objective</title><p>This study aimed to design, implement, and validate an artificial intelligence&#x2013;driven framework that fuses multimodal pediatric data and leverages advanced natural language processing and ensemble learning to improve early, accurate stratification of key pediatric health risks.</p></sec><sec sec-type="methods"><title>Methods</title><p>A retrospective dataset of over 40,000 pediatric participants aged 2&#x2010;8 years was used to train and evaluate the framework. Data were split into training, validation, and test sets (70%, 15%, and 15%, respectively) with a temporally mindful partitioning strategy to approximate prospective evaluation. Baseline comparators included traditional statistical and machine learning models, and the statistical significance of area under the receiver operating characteristic curve (AUC-ROC) differences was assessed using the DeLong test.</p></sec><sec sec-type="results"><title>Results</title><p>The proposed Bidirectional Encoder Representations From Transformers&#x2013;based model achieved an AUC-ROC of 0.85 (95% CI 0.82&#x2010;0.88), sensitivity of 0.78, specificity of 0.80, and <italic>F</italic><sub>1</sub>-score of 0.75 on the test set, outperforming multiple baseline models. In an additional manual comparison evaluation, automated and expert assessments aligned with 78% accuracy (78/100), and most discrepancies arose in &#x201C;equivalent&#x201D; cases.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study provides a validated, artificial intelligence&#x2013;driven, multimodal pediatric health risk stratification framework that translates heterogeneous child health data into clinically actionable risk profiles, demonstrating strong discriminative performance and meaningful agreement with expert assessment. The framework supports proactive, individualized pediatric care and offers a scalable foundation for further validation across broader populations and longitudinal follow-up.</p></sec></abstract><kwd-group><kwd>pediatric health</kwd><kwd>artificial intelligence</kwd><kwd>AI</kwd><kwd>risk stratification</kwd><kwd>ensemble learning</kwd><kwd>framework design</kwd><kwd>multisource data fusion</kwd><kwd>predictive modeling</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The assessment and early identification of health risks in children represent a critical area of research with profound implications for individual lifelong well-being and public health strategies. Establishing healthy developmental trajectories during childhood is fundamental, as health conditions and risk exposures in early life can significantly influence adult health outcomes and susceptibility to chronic diseases [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Traditional pediatric health surveillance often relies on periodic checkups, which may not fully capture the dynamic and multifaceted nature of health risks influenced by a complex interplay of genetic, environmental, and lifestyle factors [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Therefore, developing advanced, data-driven methodologies, such as those explored in recent research on artificial intelligence (AI) and machine learning (ML) for tasks like toxicity prediction and broader research applications, to proactively identify and stratify these risks is of paramount importance. Such systems, potentially incorporating guardian-interactive elements, can empower health care providers and families with actionable insights for timely preventive interventions, ultimately contributing to a healthier future generation and optimizing health care resource allocation [<xref ref-type="bibr" rid="ref5">5</xref>]. Global health initiatives increasingly emphasize the importance of leveraging innovative technologies, including advanced AI models, such as large language models adapted for specific domains [<xref ref-type="bibr" rid="ref6">6</xref>], to enhance child health and well-being. The potential for deep learning to enhance diagnostic and prognostic capabilities in areas like child psychiatry further underscores this trend [<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>The rapid popularization of internet-based medical services, including telehealth, mobile health (mHealth) apps, and online consultation platforms, has significantly reshaped health care delivery and accessibility worldwide [<xref ref-type="bibr" rid="ref8">8</xref>]. This digital transformation has not only provided patients and caregivers with more convenient access to medical advice and health information but also led to the generation of vast amounts of digital health data [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. While much of the initial adoption has been observed in adult care, the principles of leveraging digital platforms for health monitoring, data collection, and remote consultation, often supported by automated systems for tasks like information synthesis [<xref ref-type="bibr" rid="ref11">11</xref>], are increasingly recognized as valuable in pediatric care. The experience gained from the broader implementation of digital health solutions, including aspects of data security, interoperability, and user engagement, offers important lessons for developing effective and safe digital health tools specifically for children [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. The expanding digital health infrastructure provides a rich ecosystem for collecting diverse health-related data that, if properly managed and ethically used with robust frameworks, can be invaluable for comprehensive risk assessment in pediatric populations [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Task-oriented dialogue systems, for instance, show potential for structured data gathering in such contexts. Despite progress in pediatric health surveillance, significant clinical voids persist, particularly in the settings of primary care and population health screening. Current risk assessment paradigms are often fragmented, struggling to effectively integrate heterogeneous data streams, such as structured electronic health record (EHR) data, unstructured clinical notes, parental reports, and real-time wearable sensor data, into a single, cohesive risk profile. This leads to a reactive, rather than proactive, approach to care, where risks are often identified only after they have manifested. This research addresses this gap by proposing a framework designed for the following specific end users: (1) pediatricians in primary care, who can use the system for point-of-care decision support; (2) public health officials, who can leverage population-level risk distributions for resource allocation and strategic planning; and (3) clinical care coordinators, who can manage and monitor high-risk pediatric cohorts more effectively.</p><p>The increasing availability of EHR data, administrative data, patient-generated health data, and other health-related information sources has fueled the application of health care big data analytics to improve service quality, operational efficiency, and patient outcomes [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]. Effective utilization of these data, supported by primers on data handling and the underlying principles of models like Bidirectional Encoder Representations From Transformers (BERT), can support evidence-based clinical decision-making, enhance population health management, facilitate epidemiological surveillance, and enable the development of personalized health care interventions [<xref ref-type="bibr" rid="ref19">19</xref>]. To enhance clinical utility and facilitate shared decision-making, it is imperative that the reasons behind a child&#x2019;s risk stratification are transparent and interpretable. Shapley additive explanations (SHAP) are used due to their strong theoretical guarantee of consistency and local accuracy, which can provide a unified and reliable framework for model interpretation [<xref ref-type="bibr" rid="ref20">20</xref>]. For a given prediction, SHAP assigns an importance value (SHAP value) to each input feature, and it is crucial for unlocking its full potential in transforming health care delivery, research, and policy.</p><p>AI algorithms can analyze complex, multidimensional datasets to identify subtle patterns, predict the likelihood of various health conditions, and stratify risk earlier and more accurately than traditional statistical methods [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Researchers have explored various AI-driven systems for health applications. For example, some systems focus on the early detection of developmental disorders using behavioral or imaging data, while others aim to predict adverse outcomes in neonatal intensive care units or identify children at risk for conditions like asthma, obesity, and mental health issues, sometimes using specific BERT-based approaches for analyzing medical records or predicting conditions like Alzheimer disease (AD-BERT), which, while not pediatric, demonstrates domain-specific adaptation [<xref ref-type="bibr" rid="ref22">22</xref>]. Common approaches in developing such systems involve the curation of large, representative datasets; rigorous data preprocessing and feature engineering; the application of appropriate ML algorithms; and robust model validation using independent test sets and ideally prospective clinical evaluation [<xref ref-type="bibr" rid="ref23">23</xref>]. For instance, deep learning models have shown promise in analyzing medical images for pediatric conditions, while ensemble methods and sophisticated scoring mechanisms like BERTScore for evaluating text generation quality [<xref ref-type="bibr" rid="ref24">24</xref>] (analogous to assessing the quality of AI-generated health summaries) are often used to improve the robustness and accuracy of predictive models based on structured EHR data or textual information [<xref ref-type="bibr" rid="ref25">25</xref>]. The primary contribution of our framework lies in its advanced natural language processing (NLP) capabilities, which distinguish it from prior AI systems in pediatric care that have largely relied on traditional NLP techniques. Methodologies, such as keyword matching, bag-of-words, and term frequency-inverse document frequency models, while useful, are fundamentally limited in their ability to interpret the complex and nuanced narratives found in clinical texts.</p><p>Therefore, this study aimed to design, implement, and validate a comprehensive, AI-driven framework for pediatric health risk stratification. We hypothesize that by integrating multimodal data, including EHRs, parental questionnaires, and wearable sensor data, and leveraging advanced NLP and ensemble learning models, our system can identify and stratify key pediatric health risks (eg, obesity and developmental delay) with greater accuracy and at an earlier stage than traditional assessment methods, thereby providing robust support for clinical decision-making and pre-emptive intervention. A comparison of pediatric risk stratification frameworks is provided in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Comparison of the selection of pediatric risk stratification frameworks.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Study</td><td align="left" valign="top">Key method</td><td align="left" valign="top">Data modalities used</td><td align="left" valign="top">Clinical deployment status</td></tr></thead><tbody><tr><td align="left" valign="top">Smith et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">Logistic regression</td><td align="left" valign="top">EHR<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> (structured only)</td><td align="left" valign="top">Retrospective validation</td></tr><tr><td align="left" valign="top">Chen et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">Random forest</td><td align="left" valign="top">EHR and parental surveys</td><td align="left" valign="top">Prototype on retrospective data</td></tr><tr><td align="left" valign="top">Our study</td><td align="left" valign="top">Fine-tuned BERT<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> + ensemble</td><td align="left" valign="top">EHR, surveys, and wearables</td><td align="left" valign="top">Prototype on retrospective data</td></tr><tr><td align="left" valign="top">Lee et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">CNN<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup> on images</td><td align="left" valign="top">Medical imaging</td><td align="left" valign="top">Conceptual framework</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>EHR: electronic health record.</p></fn><fn id="table1fn2"><p><sup>b</sup>BERT: Bidirectional Encoder Representations From Transformers.</p></fn><fn id="table1fn3"><p><sup>c</sup>CNN: convolutional neural network.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Pediatric Health Risk Stratification Framework and Data Foundation</title><p>This section addresses the critical, complex, and multifaceted aspects of multimodal data acquisition, encompassing the collection of diverse data types from various sources and the requisite, often intricate, data preparation processes. These preparation stages, including but not limited to data cleaning, normalization, transformation, and feature engineering, are indispensable for effectively fueling the advanced AI models that form the analytical core of this research, ensuring they operate on high-quality, meaningful inputs to generate reliable and actionable insights [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. The overarching goal is to establish robust data and conceptual groundwork upon which sophisticated risk prediction and stratification can be built, ultimately aiming to transform pediatric health care through proactive and personalized interventions. Conceptually, the framework aims to map a complex set of input variables <inline-formula><mml:math id="ieqn1"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mtext>data </mml:mtext></mml:mrow></mml:msub></mml:math></inline-formula> (representing multimodal pediatric data) through a series of transformations and AI modeling of <inline-formula><mml:math id="ieqn2"><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi><mml:mi>I</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> to an interpretable risk stratification outcome <inline-formula><mml:math id="ieqn3"><mml:msub><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mrow><mml:mtext>risk </mml:mtext></mml:mrow></mml:msub></mml:math></inline-formula>:</p><disp-formula id="E1"><label>(1)</label><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mtext>risk</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mi>A</mml:mi><mml:mi>I</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mtext>Preprocessing</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mtext>data</mml:mtext></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>This high-level representation underscores the journey from raw data to actionable risk assessment.</p></sec><sec id="s2-2"><title>Conceptual Framework for Pediatric Risk Stratification</title><p>The creation of a robust pediatric health risk stratification system requires a meticulously defined conceptual framework that delineates the core components, their interactions, and the overall data flow. As depicted in <xref ref-type="fig" rid="figure1">Figure 1</xref>, the proposed AI-driven framework is architected to be modular, scalable, and adaptable to diverse pediatric health contexts. Its primary objective is to convert raw, multisource pediatric data into actionable risk profiles, thereby facilitating proactive and personalized health care interventions.</p><p>Commencing with <italic>multimodal data input</italic>, the framework assimilates a wide array of data sources pertinent to child health. Subsequently, the <italic>data preprocessing and harmonization</italic> layer comes into play. Given the heterogeneity and potential quality issues inherent in real-world pediatric data, such as missing values, inconsistencies, and varying formats from different sources, this layer uses data cleaning, normalization, transformation, and integration techniques to create a unified and analysis-ready dataset.</p><p>Next is the <italic>feature engineering and selection</italic> module, which is dedicated to extracting meaningful features from the preprocessed data that are highly indicative of pediatric health risks. This process may involve creating composite variables, transforming existing features, or applying dimensionality reduction techniques to optimize the input for AI models. Domain knowledge from pediatric medicine is crucial in guiding this process to ensure clinical relevance.</p><p>At the core of the framework lies the <italic>AI-powered risk modeling and stratification engine</italic>. This engine uses advanced ML algorithms, with a particular focus on ensemble learning techniques, as discussed in the Introduction, to construct predictive models for various pediatric health risks. Trained on historical data, these models learn complex patterns and relationships between input features and health outcomes or risk states. The output of this engine is not merely a binary prediction but a nuanced risk stratification, which may encompass risk scores, probability estimates, or the identification of distinct risk phenotypes. The determination of weights for each data modality is a critical aspect of our ensemble model. Rather than assigning predetermined, fixed weights, our <italic>stacking</italic> framework uses a data-driven approach.</p><p>The final layers of the framework are devoted to <italic>risk profile generation and visualization</italic> and <italic>clinical decision support and intervention pathways</italic>. The generated risk profiles are designed to be interpretable by health care professionals and understandable by caregivers, often incorporating visual aids and clear explanations. This output is intended to seamlessly integrate with clinical workflows, offering evidence-based insights to support shared decision-making regarding preventive strategies, further assessments, or targeted interventions. <italic>Continuous monitoring</italic> and <italic>model updating</italic> are also integral to the framework, ensuring that the AI models maintain their accuracy and relevance as new information emerges and clinical knowledge evolves [<xref ref-type="bibr" rid="ref31">31</xref>].</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Conceptual diagram of the AI-driven pediatric health risk stratification framework. The image visually represents the modules and their interconnections. AI: artificial intelligence.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80163_fig01.png"/></fig></sec><sec id="s2-3"><title>Identification of Key Pediatric Health Domains and Risk Factors</title><p>The primary pediatric health domains identified for this framework, as detailed in <xref ref-type="table" rid="table2">Table 2</xref>, cover a broad spectrum of child well-being. <italic>Patient safety</italic> focuses on risks, such as adverse drug events and medical errors, typically informed by EHR data and incident reports. <italic>Continuous health monitoring</italic> is crucial, tracking growth parameters and developmental milestones using EHR data and wearable sensor inputs. For children with ongoing conditions, <italic>chronic disease management</italic> addresses factors like glycemic control or asthma exacerbation frequency, primarily using EHR data and home monitoring device information. <italic>Mental health</italic> is a significant domain, evaluating behavioral indicators and emotional well-being via parental, teacher, and direct child assessments. Furthermore, <italic>preventive care</italic> adherence is monitored through risk factors, such as vaccination status and well-child visit compliance, drawing from EHR data and parental questionnaires [<xref ref-type="bibr" rid="ref32">32</xref>]. The interconnected domains of <italic>nutrition</italic> and <italic>growth</italic> consider risks associated with nutritional intake and growth percentiles, also informed by EHR and parental questionnaire data. Lastly, <italic>physical activity</italic> levels and sedentary behavior duration, identified as critical risk factors, are assessed using wearable sensor data and parental questionnaires.</p><p>For each identified health domain, a set of specific, measurable risk factors and associated potential data sources is cataloged. This systematic identification ensures that the subsequent data acquisition and AI modeling efforts are focused and aligned with the goal of comprehensive risk assessment. The dynamic nature of these risk factors across different developmental stages (infancy, early childhood, middle childhood, and adolescence) is also a key consideration in the framework&#x2019;s design and application.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Key pediatric health domains, associated risk factors, and potential data sources.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Key pediatric health domains</td><td align="left" valign="top">Associated risk factors</td><td align="left" valign="top">Potential data sources</td></tr></thead><tbody><tr><td align="left" valign="top">Patient safety</td><td align="left" valign="top">Adverse drug events and medical errors</td><td align="left" valign="top">EHR<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> data and incident reports</td></tr><tr><td align="left" valign="top">Health monitoring</td><td align="left" valign="top">Growth parameters and developmental milestones</td><td align="left" valign="top">EHR data and wearable sensor data</td></tr><tr><td align="left" valign="top">Chronic disease management</td><td align="left" valign="top">Glycemic control and asthma exacerbation frequency</td><td align="left" valign="top">EHR data and home monitoring devices</td></tr><tr><td align="left" valign="top">Acute illness severity</td><td align="left" valign="top">Fever severity and respiratory distress indicators</td><td align="left" valign="top">EHR data and parental questionnaire responses</td></tr><tr><td align="left" valign="top">Mental health</td><td align="left" valign="top">Behavioral indicators and emotional well-being</td><td align="left" valign="top">Parental and teacher assessments, and direct child assessments</td></tr><tr><td align="left" valign="top">Preventive care</td><td align="left" valign="top">Vaccination status and well-child visit adherence</td><td align="left" valign="top">EHR data and parental questionnaire responses</td></tr><tr><td align="left" valign="top">Nutrition and growth</td><td align="left" valign="top">Nutritional intake and growth percentiles</td><td align="left" valign="top">EHR data and parental questionnaire responses</td></tr><tr><td align="left" valign="top">Physical activity</td><td align="left" valign="top">Activity levels and sedentary behavior duration</td><td align="left" valign="top">Wearable sensor data and parental questionnaire responses</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>EHR: electronic health record.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-4"><title>Multimodal Data Acquisition and Preparation</title><p>The effectiveness of the AI-driven risk stratification framework is fundamentally dependent on the availability of comprehensive, high-quality, multimodal data that accurately reflect the various factors influencing child health. This section details the strategies for acquiring and preparing such data for input into the framework, emphasizing the critical importance of ethical considerations, including informed consent, data privacy, and security, in full compliance with relevant regulations, such as the General Data Protection Regulation (GDPR) and Health Insurance Portability and Accountability Act (HIPAA), adapted for pediatric populations.</p><p>The data sources for this framework are inherently multimodal, encompassing a variety of information types. EHRs serve as a primary source, offering longitudinal clinical data that include diagnoses (ICD codes), procedures, medications, laboratory results, growth chart data, and clinical notes. Access to both structured and unstructured EHR data is essential. <italic>Parental and child questionnaires or surveys</italic> are used to capture information often not systematically recorded in EHRs, such as detailed family history, socioeconomic status, lifestyle factors (diet, physical activity, and sleep), environmental exposures, and patient-reported outcomes or symptoms. <italic>Wearable sensors and mHealth data</italic> provide opportunities for continuous, real-world monitoring of physiological parameters (eg, heart rate, activity levels, and sleep patterns) and behavioral data. <italic>School health records</italic> can contribute information on immunizations, health screenings conducted at school, and potentially attendance or behavioral notes relevant to health [<xref ref-type="bibr" rid="ref33">33</xref>]. <italic>Public health and environmental databases</italic> can offer community-level data on factors like air quality, neighborhood socioeconomic indicators, and access to health care or recreational facilities. <italic>Genomic data</italic>, when applicable and ethically sourced, can provide valuable genetic risk scores or specific genetic markers associated with pediatric conditions.</p><p>The data preparation pipeline, illustrated in <xref ref-type="fig" rid="figure2">Figure 2</xref>, involves several key steps to transform raw, multimodal data into an analysis-ready dataset. Initially, data from disparate sources must undergo integration and harmonization. This process includes mapping data elements to common terminologies, resolving inconsistencies, and creating a unified data schema. Data cleaning is then performed to address missing values, correct errors, and remove outliers. Data transformation may be necessary to convert raw data into formats suitable for analysis, such as calculating age-specific <italic>z</italic> scores for growth parameters or deriving summary statistics from time-series sensor data. For instance, if <inline-formula><mml:math id="ieqn4"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>w</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is a raw measurement and <inline-formula><mml:math id="ieqn5"><mml:msub><mml:mrow><mml:mi>&#x03BC;</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>g</mml:mi><mml:mi>e</mml:mi><mml:mo>,</mml:mo><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn6"><mml:msub><mml:mrow><mml:mi>&#x03C3;</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>g</mml:mi><mml:mi>e</mml:mi><mml:mo>,</mml:mo><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> are the age- and sex-specific mean and SD, respectively, from a reference population, the <italic>z</italic> score can be calculated as follows:</p><disp-formula id="E2"><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>z</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>w</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mi>a</mml:mi><mml:mi>g</mml:mi><mml:mi>e</mml:mi><mml:mo>,</mml:mo><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>a</mml:mi><mml:mi>g</mml:mi><mml:mi>e</mml:mi><mml:mo>,</mml:mo><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>This normalization is crucial for enabling comparisons of measurements across different age groups and sexes. Finally, data anonymization or pseudonymization is rigorously applied to protect patient privacy before the data are used for model development. The resulting dataset provides the foundation for the feature engineering and AI modeling stages described in the subsequent part.(2)</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Multimodal data acquisition and preparation pipeline.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80163_fig02.png"/></fig></sec><sec id="s2-5"><title>Data Preprocessing and Feature Engineering</title><p>The integrity and utility of input data are paramount to the performance and clinical relevance of any AI model, particularly in the pediatric domain, where data can be sparse, longitudinal, and highly variable due to growth and development. Pediatric health data, often derived from multiple heterogeneous sources, such as EHRs, parental questionnaires, school health notes, and wearable sensors, typically present unique challenges, including systematically missing values (eg, developmental assessments not performed at certain ages), measurement noise (eg, variability in home-based measurements), and the presence of irrelevant or redundant information that can obscure true risk signals. Therefore, rigorous data preprocessing and thoughtful, domain-informed feature engineering are indispensable steps to prepare a high-quality dataset for subsequent AI modeling.</p><p>The initial phase of preprocessing involves comprehensive data cleaning. Missing data represent a pervasive issue in longitudinal pediatric health care datasets. For instance, growth parameters or developmental screening results might be missing for specific well-child visits. While simple imputation techniques like mean, median, and mode imputation can be applied for variables with a low percentage of missingness and random patterns, they often fail to capture the underlying data structure in pediatric cohorts. More sophisticated methods are typically required, such as k-nearest neighbors imputation, which identifies the <inline-formula><mml:math id="ieqn7"><mml:mi>k</mml:mi></mml:math></inline-formula> most similar pediatric cases (based on a suite of other observed features) and imputes the missing value for a feature <inline-formula><mml:math id="ieqn8"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> using a weighted average or majority vote from the neighbors [<xref ref-type="bibr" rid="ref34">34</xref>]. Alternatively, model-based imputation, using algorithms like multivariate imputation by chained equations (MICE), can be used. MICE iteratively models each variable with missing values as a function of other variables in the dataset, cycling through variables until convergence is achieved. For a set of variables <inline-formula><mml:math id="ieqn9"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, MICE specifies a conditional model <inline-formula><mml:math id="ieqn10"><mml:mi>P</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2223;</mml:mo><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>&#x03D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfenced></mml:math></inline-formula> for each <inline-formula><mml:math id="ieqn11"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and iteratively samples from these conditional distributions.</p><p>Outlier detection and appropriate treatment are also critical in pediatric data, where extreme values might represent genuine clinical concern or measurement error. Statistical methods, such as the <italic>z</italic> score and IQR, are used. For a data point <inline-formula><mml:math id="ieqn12"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> in a feature distribution, its <italic>z</italic> score is calculated as <inline-formula><mml:math id="ieqn13"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi>&#x03BC;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mi>&#x03C3;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, where <inline-formula><mml:math id="ieqn14"><mml:mi>&#x03BC;</mml:mi></mml:math></inline-formula> is the mean and <inline-formula><mml:math id="ieqn15"><mml:mi>&#x03C3;</mml:mi></mml:math></inline-formula> is the SD. Data points with <inline-formula><mml:math id="ieqn16"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mo>&#x003E;</mml:mo><mml:msub><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> may be flagged. The IQR method defines outliers as points falling below <inline-formula><mml:math id="ieqn17"><mml:msub><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mn>1.5</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mi>I</mml:mi><mml:mi>Q</mml:mi><mml:mi>R</mml:mi></mml:math></inline-formula> or above <inline-formula><mml:math id="ieqn18"><mml:msub><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mn>1.5</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mi>I</mml:mi><mml:mi>Q</mml:mi><mml:mi>R</mml:mi></mml:math></inline-formula>, where <inline-formula><mml:math id="ieqn19"><mml:msub><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn20"><mml:msub><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> are the first and third quartiles, respectively, and <inline-formula><mml:math id="ieqn21"><mml:mi>I</mml:mi><mml:mi>Q</mml:mi><mml:mi>R</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>-</mml:mo></mml:math></inline-formula><inline-formula><mml:math id="ieqn22"><mml:msub><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula>. Decisions on handling outliers (removal, transformation, or winsorization) are made cautiously, considering the potential clinical significance of extreme values in child health.</p><p>Feature engineering in the pediatric context is a highly domain-driven process focused on creating new, informative features from raw data to capture developmental trajectories, critical exposure periods, and clinically relevant interactions. This enhances model performance and interpretability. In child health, this includes deriving age- and sex-adjusted <italic>z</italic> scores for growth parameters, such as height, weight, BMI, and head circumference, based on standardized pediatric growth charts like World Health Organization (WHO) and Centers for Disease Control and Prevention (CDC) growth standards. It also involves creating features like developmental velocity, calculated as <inline-formula><mml:math id="ieqn23"><mml:mi mathvariant="normal">&#x0394;</mml:mi><mml:mi>D</mml:mi><mml:mo>/</mml:mo><mml:mi mathvariant="normal">&#x0394;</mml:mi><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mfenced close="" separators="|"><mml:mrow><mml:mi>D</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:mfenced><mml:mo>-</mml:mo></mml:mrow></mml:mfenced></mml:math></inline-formula><inline-formula><mml:math id="ieqn24"><mml:mfenced open="" separators="|"><mml:mrow><mml:mi>D</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:mfenced></mml:mrow></mml:mfenced><mml:mo>/</mml:mo><mml:mfenced separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:mfenced></mml:math></inline-formula>, reflecting the rate of change in developmental milestone scores between assessments. Additionally, it covers quantifying cumulative exposure to risk factors, such as days with poor air quality and screen time during early childhood, generating features for adherence to pediatric guidelines like vaccination completeness and well-child visit schedules, and creating interaction terms reflecting synergistic effects in child health, such as genetic predispositions interacting with environmental exposures.</p><p>To reduce dimensionality, mitigate the risk of overfitting (especially with potentially limited sample sizes in specific pediatric subpopulations), and improve model training efficiency and interpretability, various feature selection techniques are used. These are broadly categorized into filter methods, wrapper methods, and embedded methods. Filter methods evaluate features independently of the AI model, using statistical measures relevant to pediatric outcomes, such as the chi-square test for assessing associations between categorical risk factors (eg, maternal smoking during pregnancy) and a binary child health outcome, and the ANOVA <italic>F</italic> value for assessing the relationship between numerical predictors (eg, birth weight) and different risk groups. Wrapper methods, such as recursive feature elimination, use the performance of a specific AI model to iteratively build and evaluate models with different subsets of features. Embedded methods perform feature selection as an integral part of the model training process. For instance, tree-based algorithms like random forest and gradient boosting inherently provide feature importance scores based on how much each feature contributes to reducing impurity or error across the ensemble of trees. Regularization techniques, such as L1 regularization (Lasso), are particularly useful. The Lasso objective function for a linear model predicting a child&#x2019;s health outcome <inline-formula><mml:math id="ieqn25"><mml:mi>y</mml:mi></mml:math></inline-formula> based on features <inline-formula><mml:math id="ieqn26"><mml:mi>X</mml:mi></mml:math></inline-formula> and coefficients <inline-formula><mml:math id="ieqn27"><mml:mi>&#x03B2;</mml:mi></mml:math></inline-formula> is as follows:</p><disp-formula id="E3"><label>(3)</label><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mtext>Lasso</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:mo>&#x200A;</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:mi>&#x03BB;</mml:mi><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn28"><mml:mi>&#x03BB;</mml:mi></mml:math></inline-formula> is the regularization parameter that controls the penalty on the sum of absolute values of the coefficients, effectively shrinking less important feature coefficients to zero. The selection of <inline-formula><mml:math id="ieqn29"><mml:mi>&#x03BB;</mml:mi></mml:math></inline-formula> is typically done via cross-validation.</p><p>To provide an overview of the preprocessing pipeline and feature engineering strategies across different data modalities, the key steps are summarized in <xref ref-type="table" rid="table3">Table 3</xref>.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Summary of pediatric data preprocessing steps and feature engineering strategies.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Data type</td><td align="left" valign="top">Preprocessing methods</td><td align="left" valign="top">Feature engineering</td><td align="left" valign="top">Examples</td></tr></thead><tbody><tr><td align="left" valign="top">Growth indicators</td><td align="left" valign="top">Missing data imputation (KNN<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> and MICE<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup>)</td><td align="left" valign="top"><italic>z</italic> score calculation and velocity features</td><td align="left" valign="top">Age- and sex-adjusted <italic>z</italic> scores</td></tr><tr><td align="left" valign="top">Early exposure</td><td align="left" valign="top">Outlier detection (<italic>z</italic> score and IQR)</td><td align="left" valign="top">Cumulative exposure and interaction terms</td><td align="left" valign="top">Poor air quality days and screen time</td></tr><tr><td align="left" valign="top">Questionnaires</td><td align="left" valign="top">Data cleaning and encoding</td><td align="left" valign="top">One-hot encoding and label encoding</td><td align="left" valign="top">Vaccination scores and visit adherence</td></tr><tr><td align="left" valign="top">School records</td><td align="left" valign="top">Data integration</td><td align="left" valign="top">Feature extraction</td><td align="left" valign="top">Immunization records and attendance</td></tr><tr><td align="left" valign="top">Wearables</td><td align="left" valign="top">Noise reduction and smoothing</td><td align="left" valign="top">Activity patterns and sleep metrics</td><td align="left" valign="top">Activity levels and sleep duration</td></tr><tr><td align="left" valign="top">Genomics</td><td align="left" valign="top">Data anonymization</td><td align="left" valign="top">Genetic risk scoring</td><td align="left" valign="top">Genetic predispositions</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>KNN: k-nearest neighbors.</p></fn><fn id="table3fn2"><p><sup>b</sup>MICE: multivariate imputation by chained equations.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-6"><title>Risk Stratification Algorithm</title><p>The process of risk stratification involves translating the continuous risk probabilities or composite risk scores generated by AI models into distinct, actionable risk categories that are meaningful in a clinical context. This is achieved by defining specific thresholds that convert model outputs into discrete risk strata. The thresholds are determined through a combination of clinical expertise, statistical methods, and decision theory principles, ensuring they are both clinically relevant and statistically valid. <inline-formula><mml:math id="ieqn30"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>R</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> represents the feature vector for a specific child. For a 4-tier stratification system, the risk categories can be defined as follows:</p><list list-type="bullet"><list-item><p>Low risk: <inline-formula><mml:math id="ieqn31"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>R</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x003C;</mml:mo><mml:msub><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p></list-item><list-item><p>Moderate risk: <inline-formula><mml:math id="ieqn32"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B8;</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>R</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x003C;</mml:mo><mml:msub><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p></list-item><list-item><p>High risk: <inline-formula><mml:math id="ieqn33"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2264;</mml:mo><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>R</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x003C;</mml:mo><mml:msub><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p></list-item><list-item><p>Very high risk: <inline-formula><mml:math id="ieqn34"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>R</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>k</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2265;</mml:mo><mml:msub><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p></list-item></list><p>The selection of the thresholds <inline-formula><mml:math id="ieqn35"><mml:mfenced separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:mfenced></mml:math></inline-formula> is guided by clinical expertise, statistical methods, and decision theory. Pediatric specialists define thresholds based on established clinical guidelines, acceptable risk levels for specific age groups, and the availability and efficacy of preventive interventions. Statistical methods, such as using percentiles of the predicted risk distribution in a well-characterized pediatric reference population, can also be used. For example, thresholds might be set to correspond to the 50th, 80th, and 95th percentiles of this distribution. Decision theory principles can further refine threshold optimization by considering a utility function that balances the costs and benefits associated with true positives, false positives, true negatives, and false negatives. This might involve analyzing metrics like Youden J statistic (J=sensitivity+specificity&#x2212;1) or finding points on the precision-recall curve that correspond to desired tradeoffs for pediatric screening or intervention programs.</p><p>When the system is designed to predict multiple distinct health risks for a child, such as the risk of obesity, developmental delay, or asthma exacerbation, the framework requires a mechanism to present these risks in a consolidated and understandable manner. This can be achieved by either displaying the stratified risk level for each condition independently or by developing a composite pediatric health vulnerability index (CPHVI). The CPHVI is calculated by assigning weights <inline-formula><mml:math id="ieqn36"><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> to each risk score <italic>S</italic>(<inline-formula><mml:math id="ieqn37"><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>) based on factors, such as the clinical severity of risk <inline-formula><mml:math id="ieqn38"><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, its impact on long-term child development, its prevalence in the target pediatric population, and its responsiveness to an intervention. To translate the CPHVI into clinical practice, we propose a tiered intervention model based on predefined, clinically meaningful thresholds developed in collaboration with pediatric experts. For example, a CPHVI score exceeding a &#x201C;high risk&#x201D; threshold could automatically trigger an alert in the EHR system for the primary care physician, along with a recommendation for a direct referral to a relevant specialist. The formula for the CPHVI is as follows:</p><disp-formula id="E4"><label>(4)</label><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>P</mml:mi><mml:mi>H</mml:mi><mml:mi>V</mml:mi><mml:mi>I</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:mo>&#x200A;</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x22C5;</mml:mo><mml:mi>S</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:mo>&#x200A;</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>The determination of the weights <inline-formula><mml:math id="ieqn39"><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is a complex task, often requiring expert consensus or data-driven approaches. Another advanced approach involves using unsupervised learning techniques, such as clustering algorithms, applied to the vector of predicted individual risk probabilities for each child. This can help identify common co-occurring risk patterns or distinct pediatric subphenotypes that share similar multirisk profiles, guiding more tailored multifaceted interventions.</p><p>To enhance clinical utility and facilitate shared decision-making with families, the reasons behind a child&#x2019;s stratification into a particular risk category must be as transparent and interpretable as possible. Techniques, such as SHAP and Local Interpretable Model-Agnostic Explanations (LIME), are used. For a given prediction <inline-formula><mml:math id="ieqn40"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mtext>child</mml:mtext></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, SHAP assigns an importance value (SHAP value, <inline-formula><mml:math id="ieqn41"><mml:msub><mml:mrow><mml:mi>&#x03D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>) to each input feature <inline-formula><mml:math id="ieqn42"><mml:mi>j</mml:mi></mml:math></inline-formula>, representing its marginal contribution to pushing the prediction away from a baseline. The sum of SHAP values for all features plus the baseline prediction equals the model&#x2019;s output for that child:</p><disp-formula id="E5"><label>(5)</label><mml:math id="eqn5"><mml:mi>f</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mtext>child </mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>&#x03D5;</mml:mi></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mrow><mml:munderover><mml:mo stretchy="true">&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>M</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo>&#x200A;</mml:mo></mml:mrow></mml:mrow><mml:msub><mml:mrow><mml:mi>&#x03D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></disp-formula><p>This allows clinicians to understand which specific factors (eg, low birth weight, specific dietary patterns, and lack of physical activity) are most influential in determining a child&#x2019;s assessed risk level, thereby guiding targeted advice and interventions.</p></sec><sec id="s2-7"><title>BERT-Based Knowledge Extraction and NLP Modeling</title><p>BERT-based NLP is used to extract informative representations from unstructured pediatric clinical text (eg, consultation records, clinical notes, and parental narratives), which are subsequently used as input features for pediatric health risk stratification models.</p><p>Pediatric medical journals serve as a vital source of pediatric disease knowledge, encompassing a wealth of information on pediatric diseases, including clinical symptoms, diagnostic methods, treatment approaches, and prognoses. These journals are written by pediatric experts and researchers and are based on extensive clinical practice and scientific research. They contain detailed case studies, research findings, and expert opinions, providing a solid foundation for the extraction and application of pediatric disease knowledge. The knowledge derived from these journals can help health care professionals better understand the characteristics and progression of pediatric diseases, thereby improving the quality of pediatric health care services [<xref ref-type="bibr" rid="ref14">14</xref>].</p><p>The pediatric disease knowledge base is a structured repository that integrates information from pediatric medical journals and other authoritative sources. It includes various aspects of pediatric diseases, such as disease names, symptoms, signs, laboratory test results, imaging findings, treatment methods, and prognoses. This knowledge base is designed to provide comprehensive and accurate information on pediatric diseases, supporting clinical decision-making and research. By organizing and structuring the information from pediatric medical journals, the pediatric disease knowledge base enables efficient retrieval and utilization of pediatric disease knowledge, facilitating the application of this knowledge in clinical practice and research.</p><p>The process begins with the ingestion of pediatric medical journal data into our system. These textual data, rich in pediatric disease knowledge, undergo meticulous preprocessing to align with BERT&#x2019;s input requirements. The text is tokenized into a sequence of tokens <inline-formula><mml:math id="ieqn43"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, where each <inline-formula><mml:math id="ieqn44"><mml:msub><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> represents a word or subword unit. Additionally, the pediatric disease knowledge base is incorporated to enhance the model&#x2019;s understanding of pediatric medical concepts. BERT&#x2019;s architecture consists of multiple transformer encoder layers, each equipped with self-attention mechanisms. The input tokens, enriched with special markers like [CLS] and [SEP], pass through these layers, generating a matrix of contextualized embeddings <inline-formula><mml:math id="ieqn45"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>z</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. These embeddings capture intricate semantic relationships and contextual information within the text, forming the basis for subsequent knowledge extraction tasks.</p><p>To identify key pediatric disease entities and their relationships within the text, named entity recognition and relation extraction techniques are applied to the BERT-generated embeddings. For named entity recognition, a linear layer combined with a conditional random field is used. The conditional random field loss function is defined as follows:</p><disp-formula id="E6"><label>(6)</label><mml:math id="eqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mi>N</mml:mi><mml:mi>E</mml:mi><mml:mi>R</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mi mathvariant="normal">l</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">o</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">g</mml:mi></mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2223;</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn46"><mml:mi>N</mml:mi></mml:math></inline-formula> is the number of samples and <inline-formula><mml:math id="ieqn47"><mml:mi>P</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2223;</mml:mo><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfenced></mml:math></inline-formula> is the probability of the true label sequence <inline-formula><mml:math id="ieqn48"><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> given the input sequence <inline-formula><mml:math id="ieqn49"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. For relation extraction between entities, a classifier is trained using a cross-entropy loss function as follows:</p><disp-formula id="E7"><label>(7)</label><mml:math id="eqn7"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mi>R</mml:mi><mml:mi>E</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>M</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mi mathvariant="normal">l</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">o</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">g</mml:mi></mml:mrow><mml:msub><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where  <inline-formula><mml:math id="ieqn50"><mml:mi>M</mml:mi></mml:math></inline-formula> represents the number of entity pairs, <inline-formula><mml:math id="ieqn51"><mml:mi>C</mml:mi></mml:math></inline-formula> is the number of relation types, and <inline-formula><mml:math id="ieqn52"><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn53"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are the true and predicted probabilities for relation <italic>j</italic> between the entity pair <italic>i</italic>, respectively. The extracted entities and relationships are then integrated into the pediatric disease knowledge base. This knowledge base, enriched with entities and relationships from pediatric medical literature, serves to enhance a medical chatbot&#x2019;s responses. When users input symptoms, the chatbot leverages this knowledge base to generate accurate and contextually relevant diagnostic suggestions and risk assessments.</p><p><xref ref-type="fig" rid="figure3">Figure 3</xref> illustrates a comprehensive workflow where BERT&#x2019;s robust language understanding capabilities are harnessed to extract valuable pediatric disease knowledge from medical texts. This knowledge is subsequently used to empower a medical chatbot, enabling it to deliver precise and informative responses to user inquiries regarding pediatric symptoms. The integration of BERT with pediatric medical journals and a pediatric disease knowledge base provides a powerful tool for advancing pediatric health care through improved diagnostic accuracy and personalized treatment recommendations.</p><p>Our knowledge extraction module is built upon BioBERT, a pretrained language model optimized for the biomedical domain. We chose BioBERT as our base model due to its demonstrated strong performance on various biomedical text mining tasks. For fine-tuning, we used a domain-specific corpus comprising over 50,000 articles from leading pediatric medical journals and over 100,000 deidentified clinical notes. The model was fine-tuned for 5 epochs with a learning rate of 2e<sup>&#x2212;5</sup>. The performance of our fine-tuned model on key NLP tasks was rigorously evaluated on a manually annotated test set of 500 clinical notes, which were dual-annotated by pediatric domain experts to ensure quality.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>BERT-driven pediatric disease knowledge extraction and medical dialogue response framework. T denotes the input token sequence obtained after tokenization of the pediatric clinical text, E represents the contextualized embedding vectors produced by the BERT encoder, and Trm refers to the Transformer encoder layers of the BERT model. BERT: Bidirectional Encoder Representations From Transformers.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80163_fig03.png"/></fig></sec><sec id="s2-8"><title>System Architecture Design</title><p>The successful deployment, scalability, and usability of the AI-driven pediatric health risk stratification framework critically depend on a meticulously planned and well-engineered system architecture. This architecture must robustly support efficient multimodal data processing pipelines; reliable and timely execution of complex AI models; secure and compliant management of sensitive pediatric health data; and an intuitive, actionable interface for diverse end users, including pediatricians, specialist clinicians, public health officials, researchers, and potentially, with appropriate safeguards, parents or caregivers. A multitier, service-oriented architecture is proposed to ensure modularity, maintainability, and scalability. This typically comprises the following tiers:</p><list list-type="order"><list-item><p>Data tier: This foundational tier is responsible for the persistent storage and comprehensive management of all relevant pediatric health data. This includes raw input data from diverse sources (EHRs, wearables, and questionnaires), preprocessed and feature-engineered datasets, trained AI models (including their versions and metadata), generated risk profiles for individual children, and audit logs. This tier would likely involve a hybrid database strategy, combining relational databases (MySQL) for structured metadata, patient demographics, and well-defined clinical entities, with potentially NoSQL databases for handling large volumes of heterogeneous, unstructured, or streaming data (like continuous sensor readings). Robust data governance, backup, and recovery mechanisms are integral.</p></list-item><list-item><p>Application logic tier (backend services): This is the computational core of the system, housing the data preprocessing pipelines, sophisticated feature engineering modules, the AI model inference engine, and the risk stratification algorithms. It handles all computational tasks, business logic for risk assessment, and interactions with the data tier. This tier would be developed using scalable and efficient programming languages and appropriate backend frameworks. It may be implemented as a set of microservices to enhance scalability and independent deployability of different functionalities.</p></list-item><list-item><p>Presentation tier (frontend interfaces): This tier provides the user interface and user experience for interacting with the system. This could manifest as a secure web-based application accessible via standard browsers, designed with responsive layouts for use on various devices. For specific user groups like parents or children (with age-appropriate design), a dedicated mobile app might be considered. The frontend allows authorized users to view individualized pediatric risk stratification results, explore interactive visualizations of risk factors and trends, access evidence-based decision support information or guideline recommendations, and potentially (for clinicians) trigger further diagnostic or intervention pathways.</p></list-item></list><p>Key functional system modules are designed to support the end-to-end pediatric risk stratification process (<xref ref-type="fig" rid="figure4">Figure 4</xref>).</p><list list-type="order"><list-item><p>Pediatric data ingestion and management module: It securely handles the import of data from diverse, often disparate, pediatric health information systems and sources. It performs initial data validation and schema mapping and manages data storage, versioning, and provenance tracking, which are crucial for longitudinal pediatric studies.</p></list-item><list-item><p>Preprocessing and pediatric feature engineering module: It implements complex algorithms described in the preceding section, tailored for pediatric data. This module needs to be highly configurable and extensible to accommodate new data types, evolving pediatric clinical knowledge, or updated feature definitions.</p></list-item><list-item><p>AI model execution and management engine: It loads versioned, trained AI models and performs inference on new or updated pediatric patient data to generate risk predictions. This engine must be optimized for performance and scalability, especially if real-time or near real-time risk assessment is required for acute pediatric conditions. It also manages the model lifecycle, including retraining triggers and performance monitoring.</p></list-item><list-item><p>Pediatric risk stratification and profiling module: It implements the sophisticated algorithms from the preceding section to convert raw model outputs into clinically interpretable risk strata and comprehensive, multidimensional pediatric risk profiles, potentially including age-adjusted interpretations.</p></list-item><list-item><p>Reporting, visualization, and alerting module: It generates customized reports, interactive dashboards (eg, showing population-level pediatric risk distributions and trends over time for specific age cohorts), and dynamic visualizations (eg, individual child risk timelines and feature importance charts for specific predictions). It may also include an alerting mechanism for clinicians when a child&#x2019;s risk profile crosses critical, predefined thresholds.</p></list-item><list-item><p>User authentication, authorization, and audit module: It ensures secure, role-based access to the system and its sensitive pediatric data, adhering strictly to privacy regulations (eg, HIPAA, GDPR, and Children's Online Privacy Protection Act [COPPA]). Comprehensive audit trails of data access and system actions are maintained.</p></list-item><list-item><p>Interoperability layer (application programming interfaces [APIs]): It provides well-defined APIs, possibly using standards like Fast Healthcare Interoperability Resources, to allow secure data exchange and interaction with other clinical systems (eg, pediatric EHRs, laboratory information systems, and clinical decision support tools embedded in existing workflows).</p></list-item></list><p>The data flow within the system begins with secure data ingestion from multiple sources into the <italic>data tier</italic>. The <italic>application logic tier</italic> then orchestrates the preprocessing and feature engineering pipelines. The processed features are subsequently fed into the <italic>AI model execution engine</italic> for risk prediction. These predictions are passed to the <italic>risk stratification and profiling module</italic>, and the final, interpretable pediatric risk profiles are stored and made available for secure access and visualization through the <italic>presentation tier</italic> or via the <italic>API layer</italic>. The technology stack will be carefully chosen based on criteria, such as scalability for large pediatric populations, real-time performance requirements, security mandates for child data, ease of development and maintenance, and compatibility with existing health care IT infrastructure. This might include Python with libraries like Pandas, NumPy, Scikit-learn, TensorFlow/Keras, and PyTorch for AI or ML development; web frameworks, such as Django/Flask (Python), Spring Boot (Java), and Node.js, for the backend services; modern JavaScript frameworks like React, Angular, and Vue.js for building responsive and interactive frontend interfaces; and a combination of SQL and NoSQL databases as described for the <italic>data tier</italic>. Containerization technologies like Docker and orchestration tools like Kubernetes may be used for deployment and scaling.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Multitier system architecture diagram for pediatric risk stratification. AI: artificial intelligence.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80163_fig04.png"/></fig></sec><sec id="s2-9"><title>Prototype System Implementation</title><p>The pediatric health risk stratification prototype system integrates multisource data input with advanced AI models to deliver a comprehensive risk assessment platform. As illustrated in <xref ref-type="fig" rid="figure5">Figure 5</xref>, the system collects diverse data types, including patient demographics, clinical records, and parent-reported outcomes, which serve as the foundation for subsequent analysis and risk assessment. This integration ensures a holistic view of pediatric health, enabling more accurate and nuanced risk predictions.</p><p>At the core of the system is a pretrained BERT model, specifically fine-tuned to handle pediatric health terminology and context. The model processes input text data through multiple Transformer modules, which encode the text to capture complex linguistic patterns and semantic relationships. Feature extraction layers then distil these encoded representations into risk-relevant features. These features are passed to risk assessment components where the actual risk stratification takes place, transforming raw data into actionable insights regarding children&#x2019;s health risks.</p><p>The system&#x2019;s implementation leverages a robust technology stack to ensure efficiency, scalability, and user-friendliness. The backend, developed using Python and its rich library ecosystem, handles data processing and model execution. It uses microservices, potentially orchestrated by lightweight frameworks like Flask, to manage distinct functionalities such as data ingestion, preprocessing, and model inference. Data storage follows a hybrid approach, combining PostgreSQL for structured data with MongoDB for unstructured or semistructured content. The frontend, built with modern JavaScript frameworks, offers tailored interfaces for different users, including physicians and parents. It includes features like risk profile visualizations and population-level risk trend dashboards. Docker is used for containerization to ensure smooth deployment and scalability of the application components.</p><p>To illustrate the practical utility of the framework, consider a hypothetical clinical scenario. Dr Smith, a pediatrician, begins her day by logging into the system&#x2019;s dashboard. The system flags one of her patients, a 5-year-old child, for a significant increase in the composite risk score for developing obesity. Dr Smith clicks on the patient&#x2019;s profile and is presented with an interactive, multimodal dashboard. An explainable AI feature, using SHAP values, highlights the primary contributing factors: a recent decrease in physical activity levels captured by wearable data and parent-reported dietary logs indicating high consumption of processed foods. Based on these insights, the system provides Dr Smith with evidence-based, actionable recommendations, including a referral to a pediatric nutritionist and a set of tailored educational materials for the parents. This scenario demonstrates how the framework can transform raw data into clinically actionable insights to facilitate timely and personalized preventive care.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Structure of the pediatric health risk stratification prototype system. T denotes the input token sequence obtained after tokenization of the pediatric clinical text, E represents the contextualized embedding vectors produced by the BERT encoder, and Trm refers to the Transformer encoder layers of the BERT model. BERT: Bidirectional Encoder Representations From Transformers; EHR: electronic health record.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80163_fig05.png"/></fig></sec><sec id="s2-10"><title>Experimental Setup</title><p>The primary dataset used for training and validating the AI models included data from over 40,000 pediatric participants, with an age range of 2 to 8 years at the time of data collection or follow-up. The dataset was partitioned into 3 mutually exclusive sets: a training set (70% of the data), a validation set (15%), and an independent holdout test set (15%). To ensure temporal validity if longitudinal data were used, the split was performed such that data from earlier time periods were used for training and validation, while data from later periods were reserved for testing, mimicking a prospective evaluation.</p><p>To benchmark the performance of the proposed AI-driven framework, a selection of robust baseline models was implemented and rigorously evaluated on an identical dataset and predictive task. These comparators included established traditional statistical models frequently used in pediatric risk prediction, specifically logistic regression for binary risk outcomes, where the probability of risk <inline-formula><mml:math id="ieqn54"><mml:mi>P</mml:mi><mml:mo>(</mml:mo><mml:mi>Y</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2223;</mml:mo><mml:mi>X</mml:mi><mml:mo>)</mml:mo></mml:math></inline-formula> is modeled as <inline-formula><mml:math id="ieqn55"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>Y</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2223;</mml:mo><mml:mi>X</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:mo>&#x200A;</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, and Cox proportional hazards models in scenarios where time-to-event data for risk onset were available. Furthermore, simpler yet effective ML algorithms, namely support vector machine (SVM), configured with various kernels (linear and radial basis function), and standard single decision trees, were also incorporated as baselines. In instances where existing, validated pediatric risk scores or established rule-based systems were pertinent to the specific health outcomes under investigation and applicable to the dataset, these were also included in the comparative analysis to provide a comprehensive performance context. To rigorously assess the statistical significance of our model&#x2019;s superior performance, we used the DeLong test to compare the area under the receiver operating characteristic curve (AUC-ROC) of our proposed model with that of each baseline model.</p></sec><sec id="s2-11"><title>Ethical Considerations</title><p>This study involved research with human participants. The study protocol was reviewed and approved by the Institutional Review Board and Research Ethics Committee of Hunan University of Arts and Science (HUAS-20250504). All procedures were performed in accordance with the ethical standards of the responsible institutional committee and with the principles of the Declaration of Helsinki. Informed consent was obtained from all participants prior to their inclusion in the study. Participants were fully informed about the purpose of the study, the procedures involved, and their right to withdraw at any time without penalty. The privacy and confidentiality of all participants were strictly maintained throughout the study. All data were anonymized prior to analysis, and no personally identifiable information was collected, stored, or reported. No financial or material compensation was provided to participants for their participation in this study.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>This section reports the empirical performance of the proposed AI-driven pediatric health risk stratification framework evaluated on the independent test set, in comparison with multiple baseline models.</p><p><xref ref-type="table" rid="table4">Table 4</xref> summarizes the predictive performance of the proposed model and baseline methods across targeted pediatric health risks. The proposed model achieved an AUC-ROC of 0.85 (95% CI 0.82&#x2010;0.88), an area under the precision-recall curve of 0.70 (95% CI 0.65&#x2010;0.75), a sensitivity of 0.78, a specificity of 0.80, and an <italic>F</italic><sub>1</sub>-score of 0.75. In comparison, logistic regression, SVM, random forest, gradient boosting, and a conventional deep learning model yielded lower AUC-ROC and <italic>F</italic><sub>1</sub>-score values.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Key performance metrics of the proposed artificial intelligence model and baseline models for targeted pediatric risks in the test set.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Model name</td><td align="left" valign="top">AUC-ROC<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup>, value (95% CI)</td><td align="left" valign="top">AUC-PR<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup>, value (95% CI)</td><td align="left" valign="top">Sensitivity</td><td align="left" valign="top">Specificity</td><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">Brier score</td></tr></thead><tbody><tr><td align="left" valign="top">Proposed BERT<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup> model</td><td align="left" valign="top">0.85 (0.82&#x2010;0.88)</td><td align="left" valign="top">0.70 (0.65&#x2010;0.75)</td><td align="left" valign="top">0.78</td><td align="left" valign="top">0.80</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.15</td></tr><tr><td align="left" valign="top">Logistic regression</td><td align="left" valign="top">0.72 (0.68&#x2010;0.76)</td><td align="left" valign="top">0.60 (0.55&#x2010;0.65)</td><td align="left" valign="top">0.65</td><td align="left" valign="top">0.70</td><td align="left" valign="top">0.62</td><td align="left" valign="top">0.20</td></tr><tr><td align="left" valign="top">SVM<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="left" valign="top">0.75 (0.71&#x2010;0.79)</td><td align="left" valign="top">0.65 (0.60&#x2010;0.70)</td><td align="left" valign="top">0.70</td><td align="left" valign="top">0.72</td><td align="left" valign="top">0.66</td><td align="left" valign="top">0.18</td></tr><tr><td align="left" valign="top">Random forest</td><td align="left" valign="top">0.78 (0.74&#x2010;0.82)</td><td align="left" valign="top">0.67 (0.62&#x2010;0.72)</td><td align="left" valign="top">0.72</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.69</td><td align="left" valign="top">0.17</td></tr><tr><td align="left" valign="top">Gradient boosting</td><td align="left" valign="top">0.80 (0.77&#x2010;0.83)</td><td align="left" valign="top">0.69 (0.64&#x2010;0.74)</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.76</td><td align="left" valign="top">0.72</td><td align="left" valign="top">0.16</td></tr><tr><td align="left" valign="top">Deep learning</td><td align="left" valign="top">0.83 (0.80&#x2010;0.86)</td><td align="left" valign="top">0.72 (0.67&#x2010;0.77)</td><td align="left" valign="top">0.76</td><td align="left" valign="top">0.79</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.14</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>AUC-ROC: area under the receiver operating characteristic curve.</p></fn><fn id="table4fn2"><p><sup>b</sup>AUC-PR: area under the precision-recall curve.</p></fn><fn id="table4fn3"><p><sup>c</sup>BERT: Bidirectional Encoder Representations From Transformers.</p></fn><fn id="table4fn4"><p><sup>d</sup>SVM: support vector machine.</p></fn></table-wrap-foot></table-wrap><p>The DeLong test demonstrated that the AUC-ROC of the proposed model was significantly higher than that of each baseline model (all <italic>P</italic>&#x003C;.05). Paired <italic>t</italic> tests on <italic>F</italic><sub>1</sub>-scores similarly indicated statistically significant improvements in predictive accuracy for the proposed model relative to all comparators.</p><p>To further assess alignment with expert judgment, a manual comparison evaluation was conducted using 100 randomly selected pairs of online consultation cases. Automated predictions and expert assessments were concordant in 78 cases, corresponding to an agreement rate of 78%.</p><p>A confusion matrix (<xref ref-type="fig" rid="figure6">Figure 6</xref>) shows that most discrepancies occurred in cases manually labeled as &#x201C;equivalent,&#x201D; whereas predictions for clearly differentiated cases exhibited higher agreement with expert assessments. Misclassifications were predominantly observed within the equivalent category, while the majority of nonequivalent cases were correctly identified.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Artificial intelligence prediction confusion matrix.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80163_fig06.png"/></fig><p>Model performance was further examined across predefined pediatric subgroups, including age categories, sex, and socioeconomic status (<xref ref-type="table" rid="table5">Table 5</xref>). Comparable AUC-ROC and <italic>F</italic><sub>1</sub>-score values were observed across all subgroups. For age-based stratification, the AUC-ROC values were 0.84 for children younger than 2 years, 0.85 for those aged 2&#x2010;5 years, and 0.86 for those older than 5 years. Similar performance consistency was observed across sex and socioeconomic strata.</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Model performance across different pediatric subgroups for early childhood obesity.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Group</td><td align="left" valign="top">AUC-ROC<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup>, value (95% CI)</td><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3">Age</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x003C;2 years</td><td align="left" valign="top">0.84 (0.80&#x2010;0.88)</td><td align="left" valign="top">0.74</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2&#x2010;5 years</td><td align="left" valign="top">0.85 (0.82&#x2010;0.89)</td><td align="left" valign="top">0.75</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x003E;5 years</td><td align="left" valign="top">0.86 (0.83&#x2010;0.89)</td><td align="left" valign="top">0.76</td></tr><tr><td align="left" valign="top" colspan="3">Sex</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Male</td><td align="left" valign="top">0.85 (0.82&#x2010;0.88)</td><td align="left" valign="top">0.75</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female</td><td align="left" valign="top">0.84 (0.81&#x2010;0.87)</td><td align="left" valign="top">0.74</td></tr><tr><td align="left" valign="top" colspan="3">Status</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Low SES<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></td><td align="left" valign="top">0.83 (0.79&#x2010;0.87)</td><td align="left" valign="top">0.73</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Medium SES</td><td align="left" valign="top">0.85 (0.82&#x2010;0.88)</td><td align="left" valign="top">0.75</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>High SES</td><td align="left" valign="top">0.86 (0.83&#x2010;0.89)</td><td align="left" valign="top">0.76</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>AUC-ROC: area under the receiver operating characteristic curve.</p></fn><fn id="table5fn2"><p><sup>b</sup>SES: socioeconomic status.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Conclusions</title><p>This study successfully designed and validated a novel AI-driven framework for comprehensive pediatric health risk stratification. The primary findings were threefold. First, the framework demonstrates the capability to effectively integrate heterogeneous, multimodal data sources to create a holistic view of a child&#x2019;s health status. Second, our proposed predictive model, which combines a fine-tuned BERT architecture with an ensemble learning strategy, significantly outperformed established baseline models, such as logistic regression and SVM, in predicting key health risks, achieving an AUC-ROC of 0.85 for early childhood obesity. Third, the prototype system&#x2019;s risk assessments showed substantial agreement with manual expert evaluations (78% accuracy), confirming its potential clinical utility and feasibility. The successful implementation of the prototype system, featuring intuitive dashboards for both clinicians and parents, further illustrates the practical applicability of this approach in facilitating early and personalized interventions, thereby contributing a novel and robust technological foundation for proactive pediatric health care.</p></sec><sec id="s4-2"><title>Limitations and Future Work</title><p>While the presented framework shows considerable promise, certain limitations and avenues for future research warrant discussion. The current validation, though rigorous, was based on a specific dataset of over 40,000 pediatric participants aged 2&#x2010;8 years. Broader validation across more diverse pediatric populations and longitudinal follow-up are essential to ascertain long-term predictive accuracy and generalizability. One limitation of our study is the observed slight dip in model performance for the subgroup of children aged 3&#x2010;5 years, which we attribute to the relative scarcity of rich, unstructured text data for this age cohort. To mitigate this issue in future work, we plan to incorporate alternative text sources. A promising approach, as suggested, is the inclusion of open-ended responses from parental questionnaires. These narratives, which capture detailed parental concerns and observations, can be processed by our fine-tuned BERT model to generate rich semantic features, thereby enriching the feature set for younger children and addressing the data sparsity issue. A crucial direction for future work is the implementation of advanced bias mitigation techniques. We plan to further explore methods, such as adversarial debiasing, which involves training the model to make predictions that are invariant to sensitive attributes, thereby proactively enhancing the fairness and equity of our risk stratification framework.</p></sec></sec></body><back><notes><sec><title>Funding</title><p>The research project was supported by the Education Department of Hunan Province: Multidimensional Risk Assessment and Sports-Health Integrated Intervention Model Development for Postural Health in Children in Northwestern Hunan (project number: 24B0629), and the Aid Program for Science and Technology Innovative Research Team in Higher Educational Institutions of Hunan Province.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">API</term><def><p>application programming interface</p></def></def-item><def-item><term id="abb3">AUC-ROC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb4">BERT</term><def><p>Bidirectional Encoder Representations From Transformers</p></def></def-item><def-item><term id="abb5">CPHVI</term><def><p>composite pediatric health vulnerability index</p></def></def-item><def-item><term id="abb6">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb7">GDPR</term><def><p>General Data Protection Regulation</p></def></def-item><def-item><term id="abb8">HIPAA</term><def><p>Health Insurance Portability and Accountability Act</p></def></def-item><def-item><term id="abb9">MICE</term><def><p>multivariate imputation by chained equations</p></def></def-item><def-item><term id="abb10">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb11">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb12">SHAP</term><def><p>Shapley additive explanations</p></def></def-item><def-item><term id="abb13">SVM</term><def><p>support vector machine</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sales</surname><given-names>WB</given-names> </name><name name-style="western"><surname>Maranh&#x00E3;o</surname><given-names>EF</given-names> </name><name name-style="western"><surname>Ramalho</surname><given-names>CST</given-names> </name><name name-style="western"><surname>Mac&#x00EA;do</surname><given-names>S</given-names> </name><name name-style="western"><surname>Souza</surname><given-names>GF</given-names> </name><name name-style="western"><surname>Maciel</surname><given-names>&#x00C1;CC</given-names> </name></person-group><article-title>Early life circumstances and their impact on health in adulthood and later life: a systematic review</article-title><source>BMC Geriatr</source><year>2024</year><month>11</month><day>28</day><volume>24</volume><issue>1</issue><fpage>978</fpage><pub-id pub-id-type="doi">10.1186/s12877-024-05571-4</pub-id><pub-id pub-id-type="medline">39609801</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wadsworth</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Kuh</surname><given-names>DJ</given-names> </name></person-group><article-title>Childhood influences on adult health: a review of recent work from the British 1946 national birth cohort study, the MRC National Survey of Health and Development</article-title><source>Paediatr Perinat Epidemiol</source><year>1997</year><month>01</month><volume>11</volume><issue>1</issue><fpage>2</fpage><lpage>20</lpage><pub-id pub-id-type="doi">10.1046/j.1365-3016.1997.d01-7.x</pub-id><pub-id pub-id-type="medline">9018723</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Braveman</surname><given-names>P</given-names> </name><name name-style="western"><surname>Barclay</surname><given-names>C</given-names> </name></person-group><article-title>Health disparities beginning in childhood: a life-course perspective</article-title><source>Pediatrics</source><year>2009</year><month>11</month><volume>124 Suppl 3</volume><fpage>S163</fpage><lpage>75</lpage><pub-id pub-id-type="doi">10.1542/peds.2009-1100D</pub-id><pub-id pub-id-type="medline">19861467</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Noteboom</surname><given-names>A</given-names> </name><name name-style="western"><surname>Have</surname><given-names>MT</given-names> </name><name name-style="western"><surname>de Graaf</surname><given-names>R</given-names> </name><name name-style="western"><surname>Beekman</surname><given-names>ATF</given-names> </name><name name-style="western"><surname>Penninx</surname><given-names>B</given-names> </name><name name-style="western"><surname>Lamers</surname><given-names>F</given-names> </name></person-group><article-title>The long-lasting impact of childhood trauma on adult chronic physical disorders</article-title><source>J Psychiatr Res</source><year>2021</year><month>04</month><volume>136</volume><fpage>87</fpage><lpage>94</lpage><pub-id pub-id-type="doi">10.1016/j.jpsychires.2021.01.031</pub-id><pub-id pub-id-type="medline">33581460</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eadie</surname><given-names>P</given-names> </name><name name-style="western"><surname>Levickis</surname><given-names>P</given-names> </name><name name-style="western"><surname>McKean</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Developing preschool language surveillance models - cumulative and clustering patterns of early life factors in the early language in Victoria study cohort</article-title><source>Front Pediatr</source><year>2022</year><volume>10</volume><fpage>826817</fpage><pub-id pub-id-type="doi">10.3389/fped.2022.826817</pub-id><pub-id pub-id-type="medline">35186809</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Olsen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Pedersen</surname><given-names>LH</given-names> </name></person-group><article-title>Reproductive health indicators and fetal medicine - many things will change</article-title><source>Scand J Work Environ Health</source><year>2016</year><month>06</month><day>1</day><volume>42</volume><issue>6</issue><fpage>561</fpage><lpage>562</lpage><pub-id pub-id-type="doi">10.5271/sjweh.3596</pub-id><pub-id pub-id-type="medline">27685865</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Young</surname><given-names>C</given-names> </name><name name-style="western"><surname>Grobelna</surname><given-names>A</given-names> </name></person-group><source>Flash Glucose Monitoring Systems in Pediatric Populations With Diabetes</source><year>2021</year><access-date>2026-01-02</access-date><publisher-name>Canadian Agency for Drugs and Technologies in Health</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/books/NBK572013/">https://www.ncbi.nlm.nih.gov/books/NBK572013/</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garbarino</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bragazzi</surname><given-names>NL</given-names> </name></person-group><article-title>Revolutionizing sleep health: the emergence and impact of personalized sleep medicine</article-title><source>J Pers Med</source><year>2024</year><month>06</month><day>4</day><volume>14</volume><issue>6</issue><fpage>598</fpage><pub-id pub-id-type="doi">10.3390/jpm14060598</pub-id><pub-id pub-id-type="medline">38929819</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stidley</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>VO</given-names> </name><name name-style="western"><surname>Narva</surname><given-names>AS</given-names> </name><etal/></person-group><article-title>A population-based, cross-sectional survey of the Zuni Pueblo: a collaborative approach to an epidemic of kidney disease</article-title><source>Am J Kidney Dis</source><year>2002</year><month>02</month><volume>39</volume><issue>2</issue><fpage>358</fpage><lpage>368</lpage><pub-id pub-id-type="doi">10.1053/ajkd.2002.30557</pub-id><pub-id pub-id-type="medline">11840378</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vittoria Togo</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mastrolorito</surname><given-names>F</given-names> </name><name name-style="western"><surname>Orfino</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Where developmental toxicity meets explainable artificial intelligence: state-of-the-art and perspectives</article-title><source>Expert Opin Drug Metab Toxicol</source><year>2024</year><month>07</month><day>2</day><volume>20</volume><issue>7</issue><fpage>561</fpage><lpage>577</lpage><pub-id pub-id-type="doi">10.1080/17425255.2023.2298827</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>He</surname><given-names>O</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>D</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name></person-group><article-title>A deep-learning approach to predict reproductive toxicity of chemicals using communicative message passing neural network</article-title><source>Front Toxicol</source><year>2025</year><volume>7</volume><fpage>1640612</fpage><pub-id pub-id-type="doi">10.3389/ftox.2025.1640612</pub-id><pub-id pub-id-type="medline">40765625</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sambanis</surname><given-names>A</given-names> </name><name name-style="western"><surname>Osiecki</surname><given-names>K</given-names> </name><name name-style="western"><surname>Cailas</surname><given-names>M</given-names> </name><name name-style="western"><surname>Quinsey</surname><given-names>L</given-names> </name><name name-style="western"><surname>Jacobs</surname><given-names>DE</given-names> </name></person-group><article-title>Using artificial intelligence to identify sources and pathways of lead exposure in children</article-title><source>J Public Health Manag Pract</source><year>2023</year><volume>29</volume><issue>5</issue><fpage>E208</fpage><lpage>E213</lpage><pub-id pub-id-type="doi">10.1097/PHH.0000000000001759</pub-id><pub-id pub-id-type="medline">37129378</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>YD</given-names> </name><name name-style="western"><surname>Zeng</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Parents&#x2019; understanding and attitudes toward the application of AI in pediatric healthcare: a cross-sectional survey study</article-title><source>Front Public Health</source><year>2025</year><volume>13</volume><fpage>1654482</fpage><pub-id pub-id-type="doi">10.3389/fpubh.2025.1654482</pub-id><pub-id pub-id-type="medline">40910043</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sezgin</surname><given-names>E</given-names> </name><name name-style="western"><surname>Jackson</surname><given-names>DI</given-names> </name><name name-style="western"><surname>Kocaballi</surname><given-names>AB</given-names> </name><etal/></person-group><article-title>Can large language models aid caregivers of pediatric cancer patients in information seeking? A cross-sectional investigation</article-title><source>Cancer Med</source><year>2025</year><month>01</month><volume>14</volume><issue>1</issue><fpage>e70554</fpage><pub-id pub-id-type="doi">10.1002/cam4.70554</pub-id><pub-id pub-id-type="medline">39776222</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shah</surname><given-names>U</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>N</given-names> </name><name name-style="western"><surname>Alzubaidi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Agus</surname><given-names>M</given-names> </name><name name-style="western"><surname>Househ</surname><given-names>M</given-names> </name></person-group><article-title>ArtInsight: a multimodal AI framework for interpreting children&#x2019;s drawings and enhancing emotional understanding</article-title><source>Stud Health Technol Inform</source><year>2025</year><month>05</month><day>15</day><volume>327</volume><issue>5</issue><fpage>808</fpage><lpage>812</lpage><pub-id pub-id-type="doi">10.3233/SHTI250471</pub-id><pub-id pub-id-type="medline">40380579</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Till</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Briganti</surname><given-names>G</given-names> </name></person-group><article-title>Outcomes in machine learning models for child psychiatry: a systematic review of the literature</article-title><source>Psychiatr Danub</source><year>2025</year><month>09</month><volume>37</volume><issue>Suppl 1</issue><fpage>79</fpage><lpage>84</lpage><pub-id pub-id-type="medline">40982877</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vandewouw</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Syed</surname><given-names>B</given-names> </name><name name-style="western"><surname>Barnett</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Using deep learning to predict internalizing problems from brain structure in youth</article-title><source>Transl Psychiatry</source><year>2025</year><month>08</month><day>29</day><volume>15</volume><issue>1</issue><fpage>326</fpage><pub-id pub-id-type="doi">10.1038/s41398-025-03565-3</pub-id><pub-id pub-id-type="medline">40883286</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Till</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Briganti</surname><given-names>G</given-names> </name></person-group><article-title>AI in child psychiatry: exploring future tools for the detection and management of mental disorders in children and adolescents</article-title><source>Psychiatr Danub</source><year>2023</year><month>10</month><volume>35</volume><issue>Suppl 2</issue><fpage>20</fpage><lpage>25</lpage><pub-id pub-id-type="medline">37800200</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>F</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Usability and effectiveness of eHealth and mHealth interventions that support self-management and health care transition in adolescents and young adults with chronic disease: systematic review</article-title><source>J Med Internet Res</source><year>2024</year><month>11</month><day>26</day><volume>26</volume><fpage>e56556</fpage><pub-id pub-id-type="doi">10.2196/56556</pub-id><pub-id pub-id-type="medline">39589770</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhao</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Tang</surname><given-names>J</given-names> </name></person-group><article-title>Enhancing rural healthcare through internet-based remote collaborative outpatient services: a comprehensive evaluation in Changzhi, Shanxi Province</article-title><source>Medicine (Baltimore)</source><year>2024</year><volume>103</volume><issue>36</issue><fpage>e39614</fpage><pub-id pub-id-type="doi">10.1097/MD.0000000000039614</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>F</given-names> </name><name name-style="western"><surname>Song</surname><given-names>T</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Efficacy of an mHealth app to support patients&#x2019; self-management of hypertension: randomized controlled trial</article-title><source>J Med Internet Res</source><year>2023</year><month>12</month><day>19</day><volume>25</volume><fpage>e43809</fpage><pub-id pub-id-type="doi">10.2196/43809</pub-id><pub-id pub-id-type="medline">38113071</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hussain</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Sezgin</surname><given-names>E</given-names> </name><name name-style="western"><surname>Krivchenia</surname><given-names>K</given-names> </name><name name-style="western"><surname>Luna</surname><given-names>J</given-names> </name><name name-style="western"><surname>Rust</surname><given-names>S</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>Y</given-names> </name></person-group><article-title>A natural language processing pipeline to synthesize patient-generated notes toward improving remote care and chronic disease management: a cystic fibrosis case study</article-title><source>JAMIA Open</source><year>2021</year><month>07</month><volume>4</volume><issue>3</issue><fpage>ooab084</fpage><pub-id pub-id-type="doi">10.1093/jamiaopen/ooab084</pub-id><pub-id pub-id-type="medline">34604710</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zayas-Cab&#x00E1;n</surname><given-names>T</given-names> </name><name name-style="western"><surname>Haque</surname><given-names>SN</given-names> </name><name name-style="western"><surname>Kemper</surname><given-names>N</given-names> </name></person-group><article-title>Identifying opportunities for workflow automation in health care: lessons learned from other industries</article-title><source>Appl Clin Inform</source><year>2021</year><month>05</month><volume>12</volume><issue>3</issue><fpage>686</fpage><lpage>697</lpage><pub-id pub-id-type="doi">10.1055/s-0041-1731744</pub-id><pub-id pub-id-type="medline">34320683</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>&#x0160;uster</surname><given-names>S</given-names> </name><name name-style="western"><surname>Baldwin</surname><given-names>T</given-names> </name><name name-style="western"><surname>Lau</surname><given-names>JH</given-names> </name><etal/></person-group><article-title>Automating quality assessment of medical evidence in systematic reviews: model development and validation study</article-title><source>J Med Internet Res</source><year>2023</year><month>03</month><day>13</day><volume>25</volume><fpage>e35568</fpage><pub-id pub-id-type="doi">10.2196/35568</pub-id><pub-id pub-id-type="medline">36722350</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abbasi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Khera</surname><given-names>S</given-names> </name><name name-style="western"><surname>Dabravolskaj</surname><given-names>J</given-names> </name><name name-style="western"><surname>Aissiou</surname><given-names>A</given-names> </name><name name-style="western"><surname>Abbasi-Dezfouly</surname><given-names>R</given-names> </name></person-group><article-title>Family physicians&#x2019; feedback on the feature design of a digital health platform to streamline the care of older adults</article-title><source>Geriatrics (Basel)</source><year>2024</year><month>11</month><day>28</day><volume>9</volume><issue>6</issue><fpage>154</fpage><pub-id pub-id-type="doi">10.3390/geriatrics9060154</pub-id><pub-id pub-id-type="medline">39727813</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Smith</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cheater</surname><given-names>F</given-names> </name><name name-style="western"><surname>Bekker</surname><given-names>H</given-names> </name></person-group><article-title>Parents&#x2019; experiences of living with a child with a long-term condition: a rapid structured review of the literature</article-title><source>Health Expect</source><year>2015</year><month>08</month><volume>18</volume><issue>4</issue><fpage>452</fpage><lpage>474</lpage><pub-id pub-id-type="doi">10.1111/hex.12040</pub-id><pub-id pub-id-type="medline">23311692</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lim</surname><given-names>CP</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Govindan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>A</given-names> </name></person-group><article-title>Artificial intelligence-based human-centric decision support framework: an application to predictive maintenance in asset management under pandemic environments</article-title><source>Ann Oper Res</source><year>2025</year><month>07</month><volume>350</volume><issue>2</issue><fpage>493</fpage><lpage>516</lpage><pub-id pub-id-type="doi">10.1007/s10479-021-04373-w</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>JG</given-names> </name><name name-style="western"><surname>Jun</surname><given-names>S</given-names> </name><name name-style="western"><surname>Cho</surname><given-names>YW</given-names> </name><etal/></person-group><article-title>Deep learning in medical imaging: general overview</article-title><source>Korean J Radiol</source><year>2017</year><volume>18</volume><issue>4</issue><fpage>570</fpage><lpage>584</lpage><pub-id pub-id-type="doi">10.3348/kjr.2017.18.4.570</pub-id><pub-id pub-id-type="medline">28670152</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kalla</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bradford</surname><given-names>A</given-names> </name><name name-style="western"><surname>Schadewaldt</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Co-designing a user-centered digital health tool for supportive care needs of patients with brain tumors and their caregivers: interview analysis</article-title><source>JMIR Cancer</source><year>2025</year><month>05</month><day>23</day><volume>11</volume><fpage>e53690</fpage><pub-id pub-id-type="doi">10.2196/53690</pub-id><pub-id pub-id-type="medline">40408266</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kowalkowski</surname><given-names>M</given-names> </name><name name-style="western"><surname>Eaton</surname><given-names>T</given-names> </name><name name-style="western"><surname>Reeves</surname><given-names>KW</given-names> </name><etal/></person-group><article-title>Incorporating patient, caregiver, and provider perspectives in the co-design of an app to guide hospital at home admission decisions: a qualitative analysis</article-title><source>JAMIA Open</source><year>2024</year><month>10</month><volume>7</volume><issue>3</issue><fpage>ooae079</fpage><pub-id pub-id-type="doi">10.1093/jamiaopen/ooae079</pub-id><pub-id pub-id-type="medline">39156047</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ibrahim</surname><given-names>ST</given-names> </name><name name-style="western"><surname>Patel</surname><given-names>J</given-names> </name><name name-style="western"><surname>Katapally</surname><given-names>TR</given-names> </name></person-group><article-title>Digital citizen science for ethical monitoring of youth physical activity frequency: comparing mobile ecological prospective assessments and retrospective recall</article-title><source>PLOS Digit Health</source><year>2025</year><month>05</month><volume>4</volume><issue>5</issue><fpage>e0000840</fpage><pub-id pub-id-type="doi">10.1371/journal.pdig.0000840</pub-id><pub-id pub-id-type="medline">40315264</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sumiya</surname><given-names>M</given-names> </name><name name-style="western"><surname>Nishimura</surname><given-names>T</given-names> </name><name name-style="western"><surname>Aizaki</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Addressing the ethical, legal, and social issues of healthtech in education: insights from Japan</article-title><source>JMIR Form Res</source><year>2025</year><month>07</month><day>18</day><volume>9</volume><fpage>e72781</fpage><pub-id pub-id-type="doi">10.2196/72781</pub-id><pub-id pub-id-type="medline">40680270</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pavarini</surname><given-names>G</given-names> </name><name name-style="western"><surname>Yosifova</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Data sharing in the age of predictive psychiatry: an adolescent perspective</article-title><source>Evid Based Ment Health</source><year>2022</year><month>05</month><volume>25</volume><issue>2</issue><fpage>69</fpage><lpage>76</lpage><pub-id pub-id-type="doi">10.1136/ebmental-2021-300329</pub-id><pub-id pub-id-type="medline">35346984</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>E</given-names> </name><name name-style="western"><surname>Lounsbury</surname><given-names>O</given-names> </name><name name-style="western"><surname>Clarke</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ashrafian</surname><given-names>H</given-names> </name><name name-style="western"><surname>Darzi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Neves</surname><given-names>AL</given-names> </name></person-group><article-title>Patient and caregiver perceptions of electronic health records interoperability in the NHS and its impact on care quality: a focus group study</article-title><source>BMC Med Inform Decis Mak</source><year>2024</year><month>12</month><day>3</day><volume>24</volume><issue>1</issue><fpage>370</fpage><pub-id pub-id-type="doi">10.1186/s12911-024-02789-5</pub-id><pub-id pub-id-type="medline">39627780</pub-id></nlm-citation></ref></ref-list></back></article>