<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e66905</article-id><article-id pub-id-type="doi">10.2196/66905</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>Using Large Language Models for Chronic Disease Management Tasks: Scoping Review</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Serugunda</surname><given-names>Henry Mukalazi</given-names></name><degrees>MSc, MBA</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Jianquan</surname><given-names>Ouyang</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kasujja Namatovu</surname><given-names>Hasifah</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ssemaluulu</surname><given-names>Paul</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kimbugwe</surname><given-names>Nasser</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Garimoi Orach</surname><given-names>Christopher</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Waiswa</surname><given-names>Peter</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Information Technology, School of Computing and Informatics Technology, Makerere University</institution><addr-line>Kampala</addr-line><country>Uganda</country></aff><aff id="aff2"><institution>School of Computer Science and School of Cyberspace, Xiangtan University</institution><addr-line>Engineering Building, 2nd Floor</addr-line><addr-line>Yuhu District, Xiangtan</addr-line><addr-line>Hunan</addr-line><country>China</country></aff><aff id="aff3"><institution>Department of Information Systems, School of Computing and Informatics Technology, Makerere University</institution><addr-line>Kampala</addr-line><country>Uganda</country></aff><aff id="aff4"><institution>Department of Computer Science, Faculty of Computing and Library Science, Kabale University</institution><addr-line>Kabale</addr-line><country>Uganda</country></aff><aff id="aff5"><institution>Department of Networks, School of Computing and Informatics Technology, Makerere University</institution><addr-line>Kampala</addr-line><country>Uganda</country></aff><aff id="aff6"><institution>Department of Community Health and Behavioral Sciences, School of Public Health, College of Health Sciences, Makerere University</institution><addr-line>Kampala</addr-line><country>Uganda</country></aff><aff id="aff7"><institution>Department of Health Policy Planning and Management, School of Public Health, College of Health Sciences, Makerere University</institution><addr-line>Kampala</addr-line><country>Uganda</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Castonguay</surname><given-names>Alexandre</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Jafarizadeh</surname><given-names>Ali</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Arasteh</surname><given-names>Soroosh Tayebi</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Ouyang Jianquan, PhD, School of Computer Science and School of Cyberspace, Xiangtan University, Engineering Building, 2nd Floor, Yuhu District, Xiangtan, Hunan, 411105, China, 86 73158292718 ext 186; <email>oyjq@xtu.edu.cn</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>29</day><month>9</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e66905</elocation-id><history><date date-type="received"><day>26</day><month>09</month><year>2024</year></date><date date-type="rev-recd"><day>06</day><month>05</month><year>2025</year></date><date date-type="accepted"><day>23</day><month>05</month><year>2025</year></date></history><copyright-statement>&#x00A9;Henry Mukalazi Serugunda, Ouyang Jianquan, Hasifah Kasujja Namatovu, Paul Ssemaluulu, Nasser Kimbugwe, Christopher Garimoi Orach, Peter Waiswa. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 29.9.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e66905"/><abstract><sec><title>Background</title><p>Chronic diseases present significant challenges in health care, requiring effective management to reduce morbidity and mortality. While digital technologies like wearable devices and mobile applications have been widely adopted, large language models (LLMs) such as ChatGPT are emerging as promising technologies with the potential to enhance chronic disease management. However, the scope of their current applications in chronic disease management and associated challenges remains underexplored.</p></sec><sec><title>Objective</title><p>This scoping review investigates LLM applications in chronic disease management, identifies challenges, and proposes actionable recommendations.</p></sec><sec sec-type="methods"><title>Methods</title><p>A systematic search for English-language primary studies on LLM use in chronic disease management was conducted across PubMed, IEEE Xplore, Scopus, and Google Scholar to identify articles published between January 1, 2023, and January 15, 2025. Of the 605 screened records, 29 studies met the inclusion criteria. Data on study objectives, LLMs used, health care settings, study designs, users, disease management tasks, and challenges were extracted and thematically analyzed using the Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews guidelines.</p></sec><sec sec-type="results"><title>Results</title><p>LLMs were primarily used for patient-centered tasks, including patient education and information provision (18/29, 62%) of studies, diagnosis and treatment (6/29, 21%), self-management and disease monitoring (8/29, 28%), and emotional support and therapeutic conversations (4/29, 14%). Practitioner-centered tasks included clinical decision support (8/29, 28%) and medical predictions (6/29, 21%). Challenges identified include inaccurate and inconsistent LLM responses (18/29, 62%), limited datasets (6/29, 21%), computational and technical (6/29, 21%), usability and accessibility (9/29, 31%), LLM evaluation (5/29, 17%), and legal, ethical, privacy, and regulatory (10/29, 35%). While models like ChatGPT, Llama, and Bard demonstrated use in diabetes management and mental health support, performance issues were evident across studies and use cases.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>LLMs show promising potential for enhancing chronic disease management across patient and practitioner-centered tasks. However, challenges related to accuracy, data scarcity, usability, and ethical concerns must be addressed to ensure patient safety and equitable use. Future studies should prioritize the integration of LLMs with low-resource platforms, wearable and mobile technologies, developing culturally and age-appropriate interfaces, and establishing robust regulatory and evaluation frameworks to support safe, effective, and inclusive use in health care.</p></sec></abstract><kwd-group><kwd>chronic diseases</kwd><kwd>disease management</kwd><kwd>artificial intelligence in health care</kwd><kwd>large language models</kwd><kwd>natural language processing</kwd><kwd>NLP</kwd><kwd>generative pre-trained transformer</kwd><kwd>GPT</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Chronic diseases, such as diabetes, heart disease, asthma, lung disease, depression, hypertension, Alzheimer disorder, and cancer, are a significant global burden on health care systems [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. These conditions often lead to long-term health issues and have profound physical, psychological, and social impacts on patients [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Chronic diseases demand continuous, personalized care, often resource-intensive and difficult to scale [<xref ref-type="bibr" rid="ref1">1</xref>]. Therefore, disease management, which encompasses screenings, regular check-ups, monitoring, coordination of treatment, medication adherence, lifestyle modifications, and patient education, is crucial for improving patient outcomes, enhancing quality of life, and reducing the overall burden on health care systems [<xref ref-type="bibr" rid="ref1">1</xref>]. However, the resource-intensive nature of personalized, continuous care often makes it inaccessible to many patients, particularly in underserved populations where limited access to health care professionals and resources creates significant barriers to effective disease management [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>In recent years, the use of digital technologies such as wearable devices [<xref ref-type="bibr" rid="ref4">4</xref>], mobile apps [<xref ref-type="bibr" rid="ref5">5</xref>], and chatbots [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>] has grown significantly in the management of chronic diseases. These have mainly been used for health care tasks, including patient education, symptom monitoring, and medication management [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. The recent emergence of large language models (LLMs) such as GPT, Palm, Llama, and LaMDA [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref11">11</xref>] has demonstrated growing potential in health care applications. These models have been applied in health care for tasks such as personalized treatment recommendations, medical diagnosis, medical record summarization, and interpretation of clinical data to support clinical decision-making and disease management [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref16">16</xref>].</p><p>Chronic disease management requires continuous monitoring, patient education, treatment coordination, and personalized care strategies [<xref ref-type="bibr" rid="ref1">1</xref>]. Recent advancements in LLMs have introduced new possibilities for improving these tasks. For instance, ChatGPT has been explored in providing personalized health advice, enhancing patient engagement, and supporting symptom monitoring [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. In diabetes management, GPT-based models have been investigated for interpreting continuous glucose monitoring data, providing personalized lifestyle recommendations [<xref ref-type="bibr" rid="ref17">17</xref>], and assessing individualized risk profiles for complications such as retinopathy [<xref ref-type="bibr" rid="ref17">17</xref>]. Beyond diabetes, LLMs such as LLaMA and GPT have been investigated for mental health support [<xref ref-type="bibr" rid="ref18">18</xref>], blood pressure measurement using wearable bio signals [<xref ref-type="bibr" rid="ref19">19</xref>], management of sickle cell anemia [<xref ref-type="bibr" rid="ref20">20</xref>], and dissemination of information on inflammatory bowel diseases to patients and health care professionals [<xref ref-type="bibr" rid="ref21">21</xref>].</p><p>Despite these applications, several challenges affect the effectiveness of LLMs in chronic disease management, including inaccurate responses, limited and biased datasets, and ethical concerns [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. These issues raise concerns regarding the accuracy, reliability, and clinical applicability of LLM-generated recommendations [<xref ref-type="bibr" rid="ref24">24</xref>]. Given these challenges, a comprehensive review is essential to assess the current applications, identify key limitations, and propose strategies to enhance the effectiveness and safety of LLMs in chronic disease care. While existing reviews explore LLMs in general health care, this scoping review uniquely focuses on their role in chronic disease management. It synthesizes evidence across patient- and practitioner-centered applications, domain-specific challenges, and provides actionable recommendations. Specifically, this review aims to evaluate the current applications of LLM in chronic disease management tasks, identify key challenges, and provide actionable recommendations to address identified challenges.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Search Strategy and Information Sources</title><p>This scoping review explored the use of LLMs in chronic disease management following the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) [<xref ref-type="bibr" rid="ref25">25</xref>]. A comprehensive search was conducted in PubMed, Scopus, IEEE Xplore, and Google Scholar, selected for their coverage of peer-reviewed medical, technical, and AI-related research. Google Scholar was included to capture a broad range of academic publications, including preprints and conference papers that may not be covered by traditional databases. Search terms included combinations of &#x201C;Large language models,&#x201D; &#x201C;LLMs,&#x201D; &#x201C;ChatGPT,&#x201D; &#x201C;chronic diseases,&#x201D; and &#x201C;chronic disease management.&#x201D; The search targeted both published and unpublished English-language articles from January 1, 2023, to January 15, 2025, ensuring a focus on recent advancements in LLM applications in health care.</p></sec><sec id="s2-2"><title>Article Selection</title><p>Studies were included if they focused on applications of LLM in chronic disease management, provided full-text access, were published in English, and appeared between January 2023 and January 15, 2025. Exclusion criteria eliminated nonprimary research (reviews, editorials, viewpoints, and commentaries), abstracts without full text, non-English publications, and articles outside the date range. To capture emerging research and potentially studies, reputable non-peer&#x2013;reviewed preprints from established repositories such as arXiv and medRxiv were included. <xref ref-type="fig" rid="figure1">Figure 1</xref> illustrates the eligibility screening process with a decision tree.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Decision tree for assessing article eligibility. The exclusion of certain publication types was necessary to ensure the review focused on primary research and empirical studies that directly address the application of large language models in chronic disease management.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e66905_fig01.png"/></fig></sec><sec id="s2-3"><title>Data Extraction and Synthesis</title><p>The data from selected studies were extracted into a structured form that captured study objectives, the LLMs used, health care settings, study designs, disease management tasks, identified challenges, evaluation methods, and target users. These extracted data points were selected to ensure a structured and objective analysis aligned with the study&#x2019;s aims. Study objectives provided insight into the intended applications of LLMs in chronic disease management, while health care settings contextualized their use across clinical and patient-centered environments. Study design and evaluation methods were included to assess methodological rigor and the validity of findings. In addition, data on LLM models, disease management tasks, and key challenges enabled a systematic evaluation of current applications, limitations, and areas for future research.</p><p>The Mixed Methods Appraisal Tool (MMAT, version 2018) [<xref ref-type="bibr" rid="ref26">26</xref>] was used to perform a formal methodological quality assessment. The MMAT was selected due to its flexibility in evaluating diverse study designs included in the review. Two reviewers independently appraised each study using the 5 criteria relevant to its methodological design, with discrepancies resolved through discussion and reached consensus on final ratings. Consistent with scoping review methodology, the studies were not excluded based on quality, but results inform the interpretation of findings [<xref ref-type="bibr" rid="ref25">25</xref>]. The findings from the included studies were synthesized and presented in alignment with the study objectives. A thematic analysis approach was used to categorize qualitative insights, grouping findings into patient-centered tasks, practitioner-centered tasks, and challenges. Discrepancies in data interpretation were resolved through consensus among the reviewers. Reference management and citation generation were conducted using Mendeley.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Included Studies</title><p>The PRISMA flow diagram (<xref ref-type="fig" rid="figure2">Figure 2</xref>) outlines each stage of the study selection process. The PRISMA-ScR framework was followed to ensure transparency and reproducibility, incorporating detailed search strategies and clearly defined exclusion criteria. A total of 446 records were identified from Google Scholar, 75 from Scopus, 56 from PubMed, and 28 from IEEE Xplore. After removing duplicates, 242 unique records underwent title and abstract screening, resulting in 61 articles for full-text review. Following the application of eligibility criteria, 29 articles were included in the final analysis.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) flowchart.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e66905_fig02.png"/></fig></sec><sec id="s3-2"><title>Characteristics of Included Articles</title><p><xref ref-type="table" rid="table1">Table 1</xref> provides the characteristics of the 29 articles included in this scoping review. The articles were categorized based on publication type, with the majority of the studies being journal articles (n=18), followed by conference papers (n=8), and preprints (n=3). The studies used a variety of research designs, including experimental (n=10), qualitative research (n=3), comparative studies (n=4), cross-sectional study (n=2), case study (n=1), observational (n=3), retrospective cohort designs (n=3), mixed methods (n=1), pilot study (n=1), and prospective study (n=1). <xref ref-type="table" rid="table1">Table 1</xref> provides more details on the characteristics of the reviewed studies.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Study characteristics (n=29).</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Study</td><td align="left" valign="bottom">Country</td><td align="left" valign="bottom">Article category</td><td align="left" valign="bottom">Study objective</td><td align="left" valign="bottom">Health care setting</td><td align="left" valign="bottom">Study design</td><td align="left" valign="bottom">Evaluation</td></tr></thead><tbody><tr><td align="left" valign="top">Montagna et al [<xref ref-type="bibr" rid="ref6">6</xref>]</td><td align="left" valign="top">Italy</td><td align="left" valign="top">Conference paper</td><td align="left" valign="top">To design and implement a system architecture for a chatbot-based home blood pressure monitoring solution.</td><td align="left" valign="top">Home care setting</td><td align="left" valign="top">Experimental (system design and prototype development)</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Yang et al [<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">China</td><td align="left" valign="top">Preprint</td><td align="left" valign="top">To explore the application of a fine-tuned model-based outpatient treatment support system for treating patients with diabetes and to evaluate its effectiveness and potential value.</td><td align="left" valign="top">Clinical (West China Hospital)<break/>home care</td><td align="left" valign="top">Experimental (fine-tuning)</td><td align="left" valign="top">Human evaluation<break/>and automated evaluation metrics</td></tr><tr><td align="left" valign="top">Raghu et al [<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">&#x2003;India</td><td align="left" valign="top">Journal<break/>article</td><td align="left" valign="top">To evaluate the ability of ChatGPT to predict the diabetic retinopathy risk.</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="top">Comparative study</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Song et al [<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">The Republic of Korea</td><td align="left" valign="top">Preprint</td><td align="left" valign="top">To investigate the experiences of individuals using LLM<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> chatbots for mental health support.</td><td align="left" valign="top">Korea Advanced Institute of Science and Technology</td><td align="left" valign="top">Qualitative study</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Liu et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">China</td><td align="left" valign="top">Conference paper</td><td align="left" valign="top">Explore the use of LLMs for cuffless blood pressure measurement using wearable bio signals</td><td align="left" valign="top">Home care settings</td><td align="left" valign="top">Experimental (cuffless blood pressure measurement using LLMs)</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Ogundare et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">Nigeria</td><td align="left" valign="top">Conference paper</td><td align="left" valign="top">To investigate the potential of LLMs in ambulatory devices for sickle cell anemia management.</td><td align="left" valign="top">Home care setting</td><td align="left" valign="top">Case study</td><td align="left" valign="top">Automated evaluation metrics</td></tr><tr><td align="left" valign="top">Cankurtaran et al [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">Turkey</td><td align="left" valign="top">Journal<break/>article</td><td align="left" valign="top">To evaluate the performance of ChatGPT within the context of inflammatory bowel disease.</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Cross-sectional study</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Wang et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">China</td><td align="left" valign="top">Conference paper</td><td align="left" valign="top">To enhance the diagnosis and treatment of depression.</td><td align="left" valign="top">Clinical and homecare settings</td><td align="left" valign="top">Experimental<break/>(pre-training and fine-tuning)</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Abdullahi et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">Germany</td><td align="left" valign="top">Journal<break/>article</td><td align="left" valign="top">To explore the potential of three popular Large Language Models in medical education to enhance the diagnosis of rare and complex diseases.</td><td align="left" valign="top">Home care setting</td><td align="left" valign="top">Qualitative study</td><td align="left" valign="top">Human evaluation<break/>and automated evaluation metrics</td></tr><tr><td align="left" valign="top">Al Anezi [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">Saudi Arabia</td><td align="left" valign="top">Journal<break/>article</td><td align="left" valign="top">To analyze the use of ChatGPT as a virtual health coach for chronic disease management.</td><td align="left" valign="top">Home care setting</td><td align="left" valign="top">Quasi-experimental design</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Athavale et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">United States</td><td align="left" valign="top">Journal<break/>article</td><td align="left" valign="top">To assess whether chatbots could assist with answering patient questions and electronic health record inbox management</td><td align="left" valign="top">Clinical (Division of Vascular Surgery, Stanford University School of Medicine in Palo Alto)</td><td align="left" valign="top">Experimental<break/>(chatbot assistance in chronic venous disease management)</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Soto-Ch&#x00E1;vez et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">Colombia</td><td align="left" valign="top">Journal<break/>article</td><td align="left" valign="top">To evaluate the reliability and readability of Spanish chronic disease information presented to ChatGPT</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Cross-sectional study</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Abbas et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">Pakistan</td><td align="left" valign="top">Journal<break/>article</td><td align="left" valign="top">To assess the predictive accuracy of ChatGPT-assisted machine learning models for various chronic diseases.</td><td align="left" valign="top">Clinical (Tertiary hospital)</td><td align="left" valign="top">Observational study</td><td align="left" valign="top">Automated evaluation metrics</td></tr><tr><td align="left" valign="top">Anderson et al [<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">United States</td><td align="left" valign="top">Conference paper</td><td align="left" valign="top">The study aims to discover and rank novel relationships between various aspects of this condition.</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Experimental</td><td align="left" valign="top">Human evaluation<break/>and automated evaluation metrics</td></tr><tr><td align="left" valign="top">Ding et al [<xref ref-type="bibr" rid="ref31">31</xref>]</td><td align="left" valign="top">Taiwan</td><td align="left" valign="top">Conference paper</td><td align="left" valign="top">To develop and evaluate Large Language Multimodal Models that integrate clinical notes and laboratory test results for predicting the risk of chronic diseases, particularly type 2 diabetes mellitus</td><td align="left" valign="top">Clinical (Eastern Memorial Hospital in Taiwan</td><td align="left" valign="top">Retrospective cohort study</td><td align="left" valign="top">Automated evaluation metrics</td></tr><tr><td align="left" valign="top">Jairoun et al [<xref ref-type="bibr" rid="ref32">32</xref>]</td><td align="left" valign="top">Malaysia</td><td align="left" valign="top">Journal<break/>article</td><td align="left" valign="top">To investigate the benefits and risks associated with the application of ChatGPT in managing diabetes and metabolic illnesses</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Qualitative study</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Mondal and Naskar [<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">India</td><td align="left" valign="top">Journal article</td><td align="left" valign="top">To evaluate GPT-4&#x2019;s competency in reviewing diabetic patient management plans compared to expert reviews.</td><td align="left" valign="top">General medical setting</td><td align="left" valign="top">Comparative study</td><td align="left" valign="top">Human evaluation and automated evaluation metrics</td></tr><tr><td align="left" valign="top">Liu et al [<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">N/S<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="top">Journal article</td><td align="left" valign="top">To leverage LLMs and multi-prompt engineering for chronic disease management, specifically for detecting mental disorders through user-generated textual content.</td><td align="left" valign="top">Online platforms</td><td align="left" valign="top">Experimental<break/>(few-shot learning)</td><td align="left" valign="top">Automated evaluation metrics</td></tr><tr><td align="left" valign="top">Liao et al [<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">Taiwan</td><td align="left" valign="top">Conference paper</td><td align="left" valign="top">To develop an EHR-based chronic disease prediction platform using LLMs for diabetes, heart disease, and hypertension.</td><td align="left" valign="top">Clinical (Far Eastern Memorial Hospital, Taiwan)</td><td align="left" valign="top">Retrospective cohort study</td><td align="left" valign="top">Automated evaluation metrics</td></tr><tr><td align="left" valign="top">Ding et al [<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">Taiwan</td><td align="left" valign="top">Journal article</td><td align="left" valign="top">Predict new-onset type 2 diabetes using large language multimodal models with EHR data</td><td align="left" valign="top">Clinical (Far Eastern Memorial Hospital, Taiwan)</td><td align="left" valign="top">Retrospective cohort study</td><td align="left" valign="top">Automated evaluation metrics</td></tr><tr><td align="left" valign="top">Dao et al [<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">Ireland and Singapore</td><td align="left" valign="top">Conference paper</td><td align="left" valign="top">Design and evaluate an AI chatbot system using GPT-3.5 for proactive diabetes prevention</td><td align="left" valign="top">Community-based setting</td><td align="left" valign="top">Experimental<break/>(AI design and evaluation)</td><td align="left" valign="top">Automated evaluation metrics</td></tr><tr><td align="left" valign="top">Khan [<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">United States</td><td align="left" valign="top">Journal article</td><td align="left" valign="top">Assess the efficacy of ChatGPT in facilitating self-management strategies for diabetic patients.</td><td align="left" valign="top">Outpatient diabetes care</td><td align="left" valign="top">Observational study</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Mondal et al [<xref ref-type="bibr" rid="ref39">39</xref>]</td><td align="left" valign="top">India</td><td align="left" valign="top">Journal article</td><td align="left" valign="top">To evaluate the effectiveness of ChatGPT, an LLM, in providing answers to queries related to lifestyle-related diseases or disorders</td><td align="left" valign="top">Clinical and academic settings</td><td align="left" valign="top">Observational study</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Young et al [<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">United States</td><td align="left" valign="top">Journal article</td><td align="left" valign="top">Assess LLMs&#x2019; capacity to deliver age-appropriate explanations of chronic pediatric conditions to enhance patient understanding.</td><td align="left" valign="top">Clinical (Boston Children&#x2019;s Hospital)</td><td align="left" valign="top">Pilot study</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Li et al [<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">China</td><td align="left" valign="top">Journal article</td><td align="left" valign="top">Develop DeepDR-LLM, an integrated AI system for primary diabetes care and diabetic retinopathy (DR) screening</td><td align="left" valign="top">Low-resource primary care settings</td><td align="left" valign="top">Experimental</td><td align="left" valign="top">Human evaluation and automated evaluation metrics</td></tr><tr><td align="left" valign="top">Ying et al [<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="left" valign="top">China</td><td align="left" valign="top">Preprint</td><td align="left" valign="top">To evaluate the feasibility and utility of ChatGPT in diabetes education using retrospective and real-world patient questions.</td><td align="left" valign="top">Outpatient setting</td><td align="left" valign="top">Mixed methods</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Li et al [<xref ref-type="bibr" rid="ref43">43</xref>]</td><td align="left" valign="top">China</td><td align="left" valign="top">Journal article</td><td align="left" valign="top">To evaluate the performance of LLMs in diabetes-related queries and their potential to assist in diabetes training for primary care physicians</td><td align="left" valign="top">Primary diabetes care, endocrinology, and diabetes management.</td><td align="left" valign="top">Prospective study</td><td align="left" valign="top">Human evaluation and automated evaluation metrics</td></tr><tr><td align="left" valign="top">Hussain and Grundy [<xref ref-type="bibr" rid="ref44">44</xref>]</td><td align="left" valign="top">Australia</td><td align="left" valign="top">Journal article</td><td align="left" valign="top">Evaluate the responses of ChatGPT models to queries from diabetes patients, assessing their accuracy, biases, and limitations in providing self-management advice.</td><td align="left" valign="top">Home care setting</td><td align="left" valign="top">Comparative study</td><td align="left" valign="top">Human evaluation</td></tr><tr><td align="left" valign="top">Wang et al [<xref ref-type="bibr" rid="ref45">45</xref>]</td><td align="left" valign="top">China</td><td align="left" valign="top">Journal article</td><td align="left" valign="top">To evaluate the potential of the RISE framework to improve LLMs&#x2019; performance in accurately and safely responding to diabetes-related inquiries.</td><td align="left" valign="top">Home care setting</td><td align="left" valign="top">Comparative study</td><td align="left" valign="top">Human evaluation</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Not available.</p></fn><fn id="table1fn2"><p><sup>b</sup> LLM: large language model.</p></fn></table-wrap-foot></table-wrap><p>As shown in <xref ref-type="table" rid="table1">Table 1</xref>, most studies were conducted in China (7/29, 24%) and the United States (4/29, 14%), with limited representation of low-resource settings. Experimental designs were predominant (10/29, 35%), and nearly half of the studies (14/29, 48%) focused on diabetes management. The studies primarily focused on adult populations (28/29, 96%), with only 1 study (1/29, 4%) specifically addressing pediatric applications. Human evaluation was the most common evaluation method (16/29, 55%), followed by automated evaluation metrics (10/29, 35%) used in prototype evaluation, with some studies using both approaches (3/29, 10%). Studies were carried out in diverse health care settings, with home care settings (10/29, 35%) and clinical settings (9/29, 31%) being the most common. This diversity in study characteristics reflects the broad application of LLMs across various health care contexts for chronic disease management.</p></sec><sec id="s3-3"><title>Methodological Quality Assessment</title><p>Using the Mixed Methods Appraisal Tool [<xref ref-type="bibr" rid="ref26">26</xref>], the studies were categorized as quantitative descriptive studies (19/29, 66%), comprizing observational, cross-sectional, case study, system development, prototype evaluation, and comparative analyses. Quantitative nonrandomized studies (3/29, 10%) included retrospective cohort and quasi-experimental designs, qualitative studies (5/29, 17%), and 2 studies with mixed methods design (2/29, 7%). Of the 29 studies, 18 studies (62.1%) were classified as high quality (meeting &#x2265;4 of 5 criteria), 9 studies (31.1%) as moderate quality (meeting 2&#x2010;3 criteria), and 2 studies (6.9%) as low quality (meeting &#x2264;1 criterion). The most common methodological limitations identified across studies included inadequate sampling strategy descriptions, limited participant demographic reporting, use of synthetic clinical data, and lack of external validation. Detailed quality appraisal results for each study are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. <xref ref-type="table" rid="table2">Table 2</xref> provides an overview of the LLM types, users, tasks, and challenges identified across the included studies.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Large language model tasks and challenges (N=29).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Study</td><td align="left" valign="bottom">LLM<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="bottom">Users</td><td align="left" valign="bottom">Disease management tasks</td><td align="left" valign="bottom">Challenges</td></tr></thead><tbody><tr><td align="left" valign="top">Montagna et al [<xref ref-type="bibr" rid="ref6">6</xref>]</td><td align="left" valign="top">GPT-3</td><td align="left" valign="top">Individuals with hypertension</td><td align="left" valign="top">Patient engagement<break/>blood pressure monitoring and management.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The incorrect way patients measure their blood pressure</p></list-item></list></td></tr><tr><td align="left" valign="top">Yang et al [<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">ChatGLM-6B</td><td align="left" valign="top">Patients diagnosed with diabetes</td><td align="left" valign="top">Treatment recommendations, suggesting appropriate laboratory tests, and medication</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Inadequate understanding of complex medical records</p></list-item><list-item><p>The small size of the training data</p></list-item></list></td></tr><tr><td align="left" valign="top">Raghu et al [<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">ChatGPT</td><td align="left" valign="top">Practitioner (ophthalmologist)</td><td align="left" valign="top">Patient education, medical reports: diagnoses and predictions</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Incorrect information, privacy and protection of patient data</p></list-item></list></td></tr><tr><td align="left" valign="top">Song et al [<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">ChatGPT<break/>Llama</td><td align="left" valign="top">Individuals who have used LLM chatbots for mental health support</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Providing emotional support, engaging in therapeutic conversations, and offering recommendations tailored to individual contexts.</p></list-item><list-item><p>Addressing specific stressors or challenges faced by individuals.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Cultural Misalignments: Participants noted that recommendations from LLM chatbots often felt like they were translated from stereotypical American responses.</p></list-item><list-item><p>Linguistic Biases: Participants often felt compelled to use English when interacting with LLM chatbots.</p></list-item><list-item><p>Therapeutic Misalignment.</p></list-item></list></td></tr><tr><td align="left" valign="top">Liu et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">Gemma-7B, Mistral-7B, Yi-6B, MedAlpaca-7B, LLaMA2-7B, LLaMA3-8B, Qwen2-7B, PalmyraMed-20B, PMCLLaMA13B, OpenBioLLM-8B</td><td align="left" valign="top">Patients with hypertension</td><td align="left" valign="top">Cardiovascular disease management via cuffless blood pressure measurement.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Dataset imbalances diminish accuracy</p></list-item><list-item><p>Privacy concerns in real-world deployment</p></list-item><list-item><p>Need for calibration to mitigate individual variability</p></list-item></list></td></tr><tr><td align="left" valign="top">Ogundare et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">Unspecified</td><td align="left" valign="top">Sickle cell patients and clinicians</td><td align="left" valign="top">Assessing anemia severity in real-time, predicting time to vaso-occlusive episodes, and communicating with emergency personnel.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Creation of a reliable non-invasive tool for angiogenic level assessment, development of a biophysics model, and practical considerations of LLM communication with emergency personnel</p></list-item></list></td></tr><tr><td align="left" valign="top">Cankurtaran et al [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">ChatGPT</td><td align="left" valign="top">Inflammatory bowel disease patients and health care professionals.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Tailored responses, educational resources</p></list-item><list-item><p>monitoring and follow-up, patient empowerment</p></list-item><list-item><p>Decision support</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Insufficient responses</p></list-item><list-item><p>Limited scope of knowledge (up-to-date information)</p></list-item></list></td></tr><tr><td align="left" valign="top">Wang et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">LLaMA-7B, ChatGLM-6B<break/>Alpaca.</td><td align="left" valign="top">Individuals with depression.</td><td align="left" valign="top">Diagnosis and treatment of depression</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Absence of pretraining data sets on depression</p></list-item><list-item><p>Hallucination problem</p></list-item><list-item><p>Evaluation methodologies emphasize predictive performance and lack</p></list-item><list-item><p>Quantification of the impact on patient treatment</p></list-item></list></td></tr><tr><td align="left" valign="top">Abdullahi et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">Bard ChatGPT 3.5 and GPT-4</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Physicians</p></list-item><list-item><p>Medical Students Resident Nurses</p></list-item><list-item><p>Nurse Practitioners</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Clinical Decision Support</p></list-item><list-item><p>Medical Education</p></list-item><list-item><p>Disease Diagnosis</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Inconsistency in Responses, LLMs do not always explicitly indicate their level of uncertainty due to Limited Scope, Sample Size, and knowledge. ChatGPT-3.5 and GPT-4 were limited to health care data available up to 2021</p></list-item><list-item><p>LLMs may generate different responses for the same prompt</p></list-item></list></td></tr><tr><td align="left" valign="top">Al Anezi [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">ChatGPT</td><td align="left" valign="top">Outpatients with chronic diseases</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Providing information about patient conditions, treatment plans, and medication schedules. Reminders for medication intake, appointments, or lifestyle adjustments.</p></list-item><list-item><p>Assisting in behavior change efforts by providing evidence-based strategies, personalized goal-setting techniques, and reminders for healthy habits.</p></list-item><list-item><p>Identifying barriers to behavior change and exploring solutions to overcome them.</p></list-item><list-item><p>Monitoring blood pressure, blood glucose levels, or weight and providing feedback based on shared data.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Limited physical examination, Lack of human connection and empathy</p></list-item><list-item><p>Complexity of individual cases</p></list-item><list-item><p>Privacy and security concerns</p></list-item><list-item><p>Legal and ethical challenges, language and cultural barriers, technical limitations, diagnostic limitations, and lack of reliability and trust</p></list-item><list-item><p>Ineffectiveness in emergencies</p></list-item></list></td></tr><tr><td align="left" valign="top">Athavale et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">ChatGPT 4.0</td><td align="left" valign="top">Patient</td><td align="left" valign="top">Answered administrative and non-complex medical questions well, and electronic health record inbox management. Answering complex medical questions</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Hallucinations</p></list-item><list-item><p>Need for extensive supervised training by subject experts</p></list-item><list-item><p>No regulatory approval</p></list-item></list></td></tr><tr><td align="left" valign="top">Soto-Ch&#x00E1;vez et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">ChatGPT</td><td align="left" valign="top">Patients with chronic diseases using the Spanish language</td><td align="left" valign="top">Evaluating the reliability and readability of ChatGPT-generated patient information on chronic diseases in Spanish.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT was trained in English, which affects the accuracy of responses in Spanish</p></list-item><list-item><p>Lower reliability on chronic diseases like heart failure and chronic kidney disease</p></list-item></list></td></tr><tr><td align="left" valign="top">Abbas et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">GPT-3.5</td><td align="left" valign="top">Machine Learning engineers, clinical researchers</td><td align="left" valign="top">Chronic disease prediction</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>lack of longitudinal data</p></list-item><list-item><p>limited generalizability</p></list-item></list></td></tr><tr><td align="left" valign="top">Anderson et al [<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">GPT (Generative Pre-trained Transformer)</td><td align="left" valign="top">Practitioners</td><td align="left" valign="top">Discover and rank novel relationships between various aspects of chronic lower back pain.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The GPT-based approach took around half an hour to process approximately 500 pairs, making it computationally intensive.</p></list-item><list-item><p>Achieving strong agreement among human evaluators</p></list-item></list></td></tr><tr><td align="left" valign="top">Ding et al [<xref ref-type="bibr" rid="ref31">31</xref>]</td><td align="left" valign="top">MedAlpaca</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Patients with early diabetes</p></list-item><list-item><p>Patients with multiclass chronic diseases</p></list-item></list></td><td align="left" valign="top">Early prediction of diabetes<break/>Prediction of multiclass chronic diseases</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Lower positive rates when using only laboratory blood values</p></list-item><list-item><p>Missing tests for most patients when using only laboratory blood values.</p></list-item><list-item><p>Integrating multimodal data from clinical notes and laboratory test results</p></list-item><list-item><p>Difficulty in model explainability for early disease prediction</p></list-item></list></td></tr><tr><td align="left" valign="top">Jairoun et al [<xref ref-type="bibr" rid="ref32">32</xref>]</td><td align="left" valign="top">ChatGPT</td><td align="left" valign="top">diabetes and metabolic illnesses, endocrinologists and diabetologists</td><td align="left" valign="top">Patient support and education<break/>Tailored treatment</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Diagnostic mistakes</p></list-item><list-item><p>Patient data security and privacy</p></list-item><list-item><p>Limitations on generalizability</p></list-item><list-item><p>Integration difficulties and workflow errors, and Compliance with laws and regulations</p></list-item><list-item><p>Absence of empathy and human contact</p></list-item></list></td></tr><tr><td align="left" valign="top">Mondal et al [<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">GPT-4</td><td align="left" valign="top">Health care professionals</td><td align="left" valign="top">Reviewing and evaluating diabetes management plans for guideline adherence</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>GPT-4&#x2019;s difficulties in handling complex clinical judgments, such as medication adjustments and treatment modifications in varied clinical scenarios</p></list-item></list></td></tr><tr><td align="left" valign="top">Liu et al [<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">GPT-2 and T5</td><td align="left" valign="top">Patients with mental disorders</td><td align="left" valign="top">Detection of mental disorders (depression, anorexia, pathological gambling, self-harm) through user-generated textual content.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Need for personalized prompts to capture individual user characteristics.</p></list-item><list-item><p>Integration of medical knowledge into prompts for accurate detection.</p></list-item><list-item><p>Handling noisy and lengthy user-generated content. Few-shot learning with minimal labeled data</p></list-item></list></td></tr><tr><td align="left" valign="top">Liao et al [<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">BERT<break/>BiomedBERT<break/>Flan-T5-large-770M GPT-2</td><td align="left" valign="top">Physicians, health care providers</td><td align="left" valign="top">Prediction of chronic diseases (diabetes, heart disease, hypertension) using EHR data.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Difficulty in classifying diseases with lower positive rates (eg, hypertension)</p></list-item><list-item><p>Need for interpretability in model predictions</p></list-item><list-item><p>Integration of multimodal data (clinical notes and blood test results) for accurate predictions</p></list-item></list></td></tr><tr><td align="left" valign="top">Ding et al [<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">BERT, Roberta, BiomedBERT, Flan-T5, GPT-2</td><td align="left" valign="top">Researchers, health care professionals</td><td align="left" valign="top">Predict new-onset T2DM, early detection, and risk assessment</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Handling multimodal data, missing values, and model interpretability</p></list-item></list></td></tr><tr><td align="left" valign="top">Dao et al [<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">GPT-3.5</td><td align="left" valign="top">Individuals at risk of diabetes or with prediabetes</td><td align="left" valign="top">Instant Q&#x0026;A and advice Personalized reminders Data analysis for tailored guidance Health resource aggregation Emotional support</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Engagement barriers in prevention programs (eg, transportation, personal responsibilities)</p></list-item><list-item><p>Lack of research on AI in diabetes prevention</p></list-item><list-item><p>Need for reliable, context-aware AI responses</p></list-item></list></td></tr><tr><td align="left" valign="top">Khan [<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">ChatGPT</td><td align="left" valign="top">Diabetic patients</td><td align="left" valign="top">Real-time education and support Blood glucose monitoring guidance Medication adherence advice Lifestyle/diet recommendations Emergency detection</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Inaccuracies in medical information (eg, insulin storage guidelines, trial data mix-ups)</p></list-item><list-item><p>Lack of emotional support/empathy</p></list-item><list-item><p>Limited to pre-2021 knowledge</p></list-item><list-item><p>Difficulty distinguishing medical terminologies</p></list-item><list-item><p>Low adoption among older adults</p></list-item></list></td></tr><tr><td align="left" valign="top">Mondal et al [<xref ref-type="bibr" rid="ref39">39</xref>]</td><td align="left" valign="top">ChatGPT-4</td><td align="left" valign="top">Patients and health care professionals</td><td align="left" valign="top">Answering patient questions (causes, symptoms, treatment, diet) Providing information on managing Crohn&#x2019;s disease (CD) and ulcerative colitis (UC).<break/>Addressing professional queries (classification, diagnosis, disease activity, prognostic markers, complications)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Insufficient elaboration on medical agents and surgical indications</p></list-item><list-item><p>Inadequate information for patients. ChatGPT provided different answers to the same question across sessions</p></list-item><list-item><p>Lower reliability/usefulness scores for patient-directed questions compared to professional-focused ones.</p></list-item><list-item><p>Outdated information</p></list-item></list></td></tr><tr><td align="left" valign="top">Young et al [<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">GPT-4 Gemini 1.0 Ultra</td><td align="left" valign="top">Pediatric patients, health care providers, and caregivers</td><td align="left" valign="top">Generating explanations for chronic conditions</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Age-appropriateness discrepancies between models (GPT-4 versus Gemini)</p></list-item><list-item><p>Lack of direct feedback from pediatric patients; reliance on clinician evaluations.</p></list-item></list></td></tr><tr><td align="left" valign="top">li et al [<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">LLaMA</td><td align="left" valign="top">Primary care physicians</td><td align="left" valign="top">Individualized diabetes management recommendations</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Underdiagnosis and poor primary diabetes management</p></list-item></list></td></tr><tr><td align="left" valign="top">Ying et al [<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="left" valign="top">GPT-3.5</td><td align="left" valign="top">Physicians, laypersons, and type 2 diabetes patients</td><td align="left" valign="top">Diabetes education and personalized Q&#x0026;A support</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Lower real-world performance, variability by prompt, trust, and safety concerns</p></list-item></list></td></tr><tr><td align="left" valign="top">Li et al [<xref ref-type="bibr" rid="ref43">43</xref>]</td><td align="left" valign="top">ChatGPT-3.5<break/>ChatGPT-4.0<break/>Google Bard<break/>MedGPT<break/>LlaMA2-7B</td><td align="left" valign="top">Researchers<break/>Primary care physicians</td><td align="left" valign="top">Answering diabetes-related exam questions and assisting in diabetes training.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Poor performance in both Chinese and English diabetes-related questions</p></list-item><list-item><p>LLMs may provide misleading explanations and difficulty with multiple-choice and case analysis questions</p></list-item></list></td></tr><tr><td align="left" valign="top">Hussain and Grundy [<xref ref-type="bibr" rid="ref44">44</xref>]</td><td align="left" valign="top">ChatGPT-3.5<break/>ChatGPT-4</td><td align="left" valign="top">Diabetes patients and health care providers</td><td align="left" valign="top">Patient education, treatment recommendations, insulin management, dietary advice</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Inaccuracies in medical advice</p></list-item><list-item><p>Lack of personalization</p></list-item><list-item><p>Failure to recognize regional variations</p></list-item><list-item><p>Incorrect assumptions about blood glucose units limitations in addressing complex patient histories</p></list-item></list></td></tr><tr><td align="left" valign="top">Wang et al [<xref ref-type="bibr" rid="ref45">45</xref>]</td><td align="left" valign="top">GPT-4<break/>Anthropic Claude 2<break/>Google Bard</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Clinicians</p></list-item><list-item><p>Diabetes patients</p></list-item></list></td><td align="left" valign="top">Responding to diabetes-related inquiries and providing accurate and comprehensive information for diabetes self-management.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Lack of specialized medical knowledge in commercially available LLMs</p></list-item><list-item><p>Susceptibility to generating inaccurate or misleading information</p></list-item><list-item><p>Need for real-time, domain-specific knowledge to improve accuracy and reliability</p></list-item><list-item><p>Ensuring responses are safe, accurate, and understandable for patients</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup> LLM: large language model.</p></fn></table-wrap-foot></table-wrap><p>As shown in <xref ref-type="table" rid="table2">Table 2</xref>, GPT models were the most commonly used (14/29, 48%), followed by LLaMA variants (5/29, 17%), the Bard model (3/29, 10%), and BERT-based models (2/29, 7%). LLMs were primarily used for patient education and information provision (18/29, 62%), with most studies targeting patients (18/29, 62%) rather than health care providers (11/29, 38%). Inaccurate and inconsistencies in responses (18/29, 62 %) were the most frequently reported challenge across studies.</p></sec><sec id="s3-4"><title>Objective 1: The Tasks in Chronic Disease Management Performed by LLMs</title><p>Our literature synthesis revealed that LLMs have significant potential to improve various chronic disease management tasks. The tasks identified have been broadly categorized into patient-centered tasks and practitioner-centered tasks.</p></sec><sec id="s3-5"><title>Patient-Centered Tasks</title><sec id="s3-5-1"><title>Patient Education and Information Provision</title><p>Eighteen studies (n=18) delved into the use of LLMs in providing health information to enhance patient health literacy [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref41">41</xref>]. Key applications included using ChatGPT to provide personalized guidance on diabetes management [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>], educational content for diabetic retinopathy and inflammatory bowel disease patients [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref21">21</xref>], generating age-appropriate explanations of chronic pediatric conditions [<xref ref-type="bibr" rid="ref40">40</xref>], and supporting physician training in diabetes management [<xref ref-type="bibr" rid="ref43">43</xref>]. In addition, LLMs supported treatment adherence through tools like ChatGPT and GPT-3.5, offering tailored medication reminders, appointment scheduling, and strategies for behavior change [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. For Spanish-speaking populations, ChatGPT was evaluated for reliability and readability of chronic disease information [<xref ref-type="bibr" rid="ref28">28</xref>], while multiple ChatGPT versions were assessed for regional variations in diabetes education quality [<xref ref-type="bibr" rid="ref44">44</xref>].</p></sec><sec id="s3-5-2"><title>Diagnosis and Treatment</title><p>Six studies (n=6) examined the role of LLMs in assisting with diagnosis and treatment recommendations. These studies explored the potential of LLMs to suggest appropriate laboratory tests, generating differential diagnoses and medication options tailored to the individual patient&#x2019;s condition [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. Notable applications included enhancing depression diagnosis and treatment through fine-tuning of models like LLaMA-7B and ChatGLM-6B [<xref ref-type="bibr" rid="ref22">22</xref>], supporting the diagnosis of rare and complex diseases [<xref ref-type="bibr" rid="ref23">23</xref>], and detecting mental disorder patterns through analysis of user-generated content [<xref ref-type="bibr" rid="ref34">34</xref>]. Furthermore, integrating AI-driven diagnostic and treatment capabilities with diabetes management systems showed particular promise in low-resource primary care settings [<xref ref-type="bibr" rid="ref41">41</xref>].</p></sec><sec id="s3-5-3"><title>Self-Management and Disease Monitoring</title><p>Eight studies (n=8) addressed using LLMs for self-management and disease monitoring. These studies explored how LLMs provide guidance on managing chronic conditions, promote patient engagement, and support home disease monitoring [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Key applications included developing chatbot architectures for home blood pressure monitoring [<xref ref-type="bibr" rid="ref6">6</xref>], creating cuffless blood pressure measurement systems using wearable biosignals [<xref ref-type="bibr" rid="ref19">19</xref>], and assessing real-time disease severity in sickle cell anemia [<xref ref-type="bibr" rid="ref20">20</xref>]. LLMs also demonstrated value in detecting emergencies such as hypoglycemic episodes in diabetic patients and guiding appropriate actions [<xref ref-type="bibr" rid="ref38">38</xref>]. Additional applications encompassed monitoring tools for inflammatory bowel disease management [<xref ref-type="bibr" rid="ref21">21</xref>], personalized reminders for diabetes prevention [<xref ref-type="bibr" rid="ref37">37</xref>], and comprehensive health parameter tracking with feedback based on patient-shared data [<xref ref-type="bibr" rid="ref24">24</xref>]. These implementations highlight the potential of LLMs to enhance patient self-management through continuous monitoring and timely intervention guidance.</p></sec><sec id="s3-5-4"><title>Emotional Support and Therapeutic Conversations</title><p>Four studies (n=4) explored the role of LLMs in providing emotional support and engaging in therapeutic conversations for patients managing chronic diseases. The review identified several key applications, including investigating LLM chatbots for mental health support with tailored recommendations addressing specific stressors [<xref ref-type="bibr" rid="ref18">18</xref>], evaluating ChatGPT as a virtual health coach identifying barriers to behavior change [<xref ref-type="bibr" rid="ref24">24</xref>], assessing GPT-3.5&#x2019;s emotional support capabilities in proactive diabetes prevention [<xref ref-type="bibr" rid="ref37">37</xref>], and examining ChatGPT&#x2019;s ability to provide coping strategies for diabetic patients [<xref ref-type="bibr" rid="ref38">38</xref>].</p></sec></sec><sec id="s3-6"><title>Practitioner-Centered Tasks</title><sec id="s3-6-1"><title>Clinical Decision Support</title><p>Eight studies (n=8) investigated the use of LLMs for clinical decision support. The review identified several key applications, including generating personalized medical reports with treatment options and diagnostic procedures for conditions like diabetic retinopathy [<xref ref-type="bibr" rid="ref17">17</xref>] and inflammatory bowel disease [<xref ref-type="bibr" rid="ref21">21</xref>], assessing LLMs&#x2019; diagnostic accuracy compared with human experts in rare and complex diseases [<xref ref-type="bibr" rid="ref23">23</xref>], and exploring potential use for electronic health record inbox management [<xref ref-type="bibr" rid="ref27">27</xref>]. Other applications included using GPT to discover and rank novel relationships between aspects of chronic lower back pain [<xref ref-type="bibr" rid="ref30">30</xref>], evaluating diabetes management plans using GPT-4 [<xref ref-type="bibr" rid="ref33">33</xref>], disease classification and prognosis [<xref ref-type="bibr" rid="ref39">39</xref>], and evaluating LLMs&#x2019; competency in answering diabetes-related exam questions for physician training [<xref ref-type="bibr" rid="ref43">43</xref>].</p></sec><sec id="s3-6-2"><title>Medical Predictions</title><p>Six studies (n=6) explored the predictive capabilities of LLMs in chronic disease management. The review identified several key applications, including predicting diabetic retinopathy risk [<xref ref-type="bibr" rid="ref17">17</xref>], developing ChatGPT-assisted machine learning models for chronic disease classification [<xref ref-type="bibr" rid="ref29">29</xref>], and integrating multimodal data from electronic health records and laboratory tests to predict new-onset type 2 diabetes [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. Additional applications included creating an EHR-based prediction platform for diabetes, heart disease, and hypertension [<xref ref-type="bibr" rid="ref35">35</xref>] and implementing integrated AI systems for diabetes risk assessment in primary care settings [<xref ref-type="bibr" rid="ref41">41</xref>].</p><p>LLMs in chronic disease management are predominantly utilized for patient education and information provision, accounting for (18/29) of reported applications. Self-management and disease monitoring and clinical decision support each account for 28% (8/29) of applications. Diagnosis and treatment tasks, along with medical predictions, both constitute 21% (6/29) of applications, while emotional support and therapeutic conversations account for 14% (4/29). Percentages exceed 100% due to thematic overlaps where individual studies addressed multiple tasks.</p><p>LLMs in chronic disease management are predominantly utilized for patient education and information provision, accounting for (18/29) of reported applications. Self-management and disease monitoring and clinical decision support each account for 28% (8/29) of applications. Diagnosis and treatment tasks, along with medical predictions, both constitute 21% (6/29) of applications, while emotional support and therapeutic conversations account for 14% (4/29). Percentages exceed 100% due to thematic overlaps where individual studies addressed multiple tasks.</p></sec></sec><sec id="s3-7"><title>Objective 2: Challenges Associated With Using LLMs for Chronic Disease Management</title><p>The challenges identified and presented in <xref ref-type="table" rid="table2">Table 2</xref> were categorized as follows.</p><sec id="s3-7-1"><title>Inaccurate and Inconsistencies in Responses</title><p>Eighteen studies (n=18) highlighted issues with hallucinations, diagnostic errors, and unreliable outputs [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>-<xref ref-type="bibr" rid="ref45">45</xref>]. The review identified several key challenges, including hallucinations where models like ChatGPT and LLaMA generate reasonable but factually incorrect information [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref45">45</xref>], diagnostic errors in conditions ranging from depression to inflammatory bowel disease [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref44">44</xref>], and inconsistent responses to identical prompts without indicating uncertainty levels [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. In addition, models demonstrated limited understanding of complex medical records [<xref ref-type="bibr" rid="ref16">16</xref>], struggled with regional variations in medical practice [<xref ref-type="bibr" rid="ref44">44</xref>], provided insufficient elaboration on medical treatments [<xref ref-type="bibr" rid="ref39">39</xref>], and showed difficulty distinguishing medical terminologies [<xref ref-type="bibr" rid="ref38">38</xref>]. Further challenges included lower reliability in the Spanish language for specific chronic conditions like heart failure and chronic kidney disease [<xref ref-type="bibr" rid="ref28">28</xref>] as well as limited generalizability due to restricted training populations [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. These inaccuracies stem from erroneous input data, such as incomplete or incorrect test results [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref31">31</xref>].</p></sec><sec id="s3-7-2"><title>Limited Datasets and Knowledge</title><p>Six studies (n=6) identified challenges related to limited datasets and knowledge cutoffs in LLM applications for chronic disease management. The review highlighted several key limitations, including scarcity of disease-specific datasets [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], dataset imbalances affecting predictions for conditions like hypertension [<xref ref-type="bibr" rid="ref19">19</xref>], and knowledge limitations that restrict LLM awareness of the current medical guidelines [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref39">39</xref>].</p></sec><sec id="s3-7-3"><title>Computational and Technical Challenges</title><p>Six studies (n=6) highlight significant computational and technical challenges in deploying LLMs for chronic disease management. The review identified several key limitations, including resource-intensive processing that results in prolonged training time in resource-constrained environments [<xref ref-type="bibr" rid="ref30">30</xref>]. In addition, technical challenges include integrating multimodal data from clinical notes and laboratory results [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref36">36</xref>], ensuring model explainability for early disease prediction [<xref ref-type="bibr" rid="ref35">35</xref>], and handling noisy user-generated content in mental health applications [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. Further challenges involve difficulties in integrating LLMs into clinical workflows [<xref ref-type="bibr" rid="ref32">32</xref>] and managing complex clinical judgments, such as medication adjustments and treatment modifications [<xref ref-type="bibr" rid="ref33">33</xref>].</p></sec><sec id="s3-7-4"><title>Usability and Accessibility Concerns</title><p>Nine studies (n=9) identified usability and accessibility concerns surrounding LLMs in chronic disease management tasks. Notably, the restriction to textual inputs limits use for tasks involving multimodal diagnostic tasks [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref40">40</xref>] language and cultural misalignments [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref28">28</xref>], while age-inappropriate outputs pose challenges for pediatric care [<xref ref-type="bibr" rid="ref40">40</xref>]. In addition, poor interpretability of model predictions for early disease prediction and risk assessment [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. Additional challenges included a lack of empathy and ineffectiveness in emergencies [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref29">29</xref>], digital literacy gaps restricting adoption among older adults [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. Furthermore, these studies also noted how insufficient transparency in model decision-making processes hindered trust and clinical acceptance [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>].</p></sec><sec id="s3-7-5"><title>LLM Evaluation</title><p>Five studies (n=5) noted challenges involving LLM evaluation. Notable challenges identified included that automated evaluation metrics primarily focus on predictive performance and fail to assess the impact on patient treatment outcomes [<xref ref-type="bibr" rid="ref22">22</xref>], difficulties in achieving consensus among human evaluators when assessing LLM outputs [<xref ref-type="bibr" rid="ref30">30</xref>], discrepancies between model performance in test environments versus real-world applications [<xref ref-type="bibr" rid="ref42">42</xref>], difficulties in consistently evaluating language models across different diabetes-related tasks [<xref ref-type="bibr" rid="ref43">43</xref>], and significant variations in age-appropriateness scoring between different LLM platforms [<xref ref-type="bibr" rid="ref40">40</xref>].</p></sec><sec id="s3-7-6"><title>Legal, Ethical, Privacy, and Regulatory Concerns</title><p>Ten studies (n=10) identified legal, ethical, privacy, and regulatory challenges of using LLMs in chronic disease management. The review highlighted several critical concerns, including privacy and data security vulnerabilities [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref32">32</xref>], absence of regulatory approval and standardized guidelines [<xref ref-type="bibr" rid="ref27">27</xref>], and compliance issues with health care laws across different jurisdictions [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. In addition, language and cultural barriers posed additional challenges, particularly for non-English speakers [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref28">28</xref>], while bias and equity issues stemming from limited training data diversity raised concerns about health care disparities [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Studies also noted ethical challenges around accountability for errors [<xref ref-type="bibr" rid="ref24">24</xref>], lack of transparency in decision-making [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref27">27</xref>], and limitations in addressing complex ethical dilemmas in clinical care [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>].</p><p>The most prevalent challenge identified was inaccurate and inconsistent responses, reported in 62% (18/29) of studies. Legal, ethical, privacy, and regulatory concerns followed, appearing in 35% (10/29) of studies. Usability and accessibility issues were noted in 31% (9/29) of studies. Computational and technical limitations, as well as dataset and knowledge constraints, were each reported in 21% (6/29) of studies. Additionally, 17% (5/29) of studies highlighted limitations in evaluation methodologies.</p></sec></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This scoping review presents 3 significant findings from the 29 included studies (n=29): (1) LLMs are mostly used for both patient-centered (18/29, 62%) and practitioner-centered (11/29, 38%) tasks, with patient education and information provision emerging as the major application (18/29, 62%); (2) despite promising applications, significant challenges still exist, particularly regarding LLM response accuracy (18/29, 62% of studies), ethical concerns (10/29, 35%), and usability issues (9/29, 31%); and (3) methodological quality varies considerably across studies, with journal articles demonstrating higher quality (13/18, 72%) compared with conference papers (3/8, 38%) and preprints (1/3, 33%). These findings highlight both the considerable promise and significant limitations of current LLM applications in chronic disease management, which are examined in detail below.</p></sec><sec id="s4-2"><title>Chronic Disease Management Tasks</title><p>Chronic disease management is an approach to managing chronic illnesses involving screenings, regular check-ups, monitoring, coordination of treatment, medication adherence, lifestyle modifications, and patient education [<xref ref-type="bibr" rid="ref1">1</xref>]. The findings from this scoping review reveal an increasing interest in leveraging LLMs like ChatGPT to support both patient-centered and practitioner-centered tasks [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>].</p></sec><sec id="s4-3"><title>Patient-Centered Tasks</title><p>The majority of the studies (18/29, 62%) focused on patient-centered tasks, reflecting the emphasis on patient active engagement in chronic disease management [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. Patients&#x2019; active engagement enables them to monitor their symptoms, disease progress, weight, and adverse drug effects, and adhere to medication and visits [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. This review found that LLMs support various patient-centered applications, including patient education and information provision, disease monitoring and self-management, emotional support and therapeutic conversations, and diagnosis and treatment assistance.</p><p>Patient education and information provision emerged as the most prominent application (18/29, 62%), with LLMs providing health information about conditions, treatment plans, and medication schedules [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref41">41</xref>]. With the right health information, individuals with chronic diseases can easily self-manage their conditions. Diagnosis and treatment applications accounted for (6/29, 21%). Studies experimented using LLMs to suggest laboratory tests, for diagnosis, and for medication generation tailored to individual patient conditions [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. However, using LLMs for diagnostic and treatment has been criticized due to concerns about hallucinations and misinterpretations of clinical guidelines [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>], highlighting the need for continued research to ensure patient safety. Therefore, LLM usage should not replace health practitioners but instead serve as complementary tools.</p><p>Self-management and disease monitoring applications (8/29, 28%) demonstrated how LLMs can facilitate home-based monitoring of various physiological parameters [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Recent studies have highlighted that patient engagement with health monitoring technologies is crucial for improving health outcomes in chronic disease management [<xref ref-type="bibr" rid="ref47">47</xref>]. Studies have also shown that wearable technologies integrated with LLMs provide real-time patient-centered health data that can better inform self-management decision-making [<xref ref-type="bibr" rid="ref48">48</xref>]. However, ensuring consistent long-term engagement is still a challenge.</p><p>Emotional support and therapeutic conversations (4/29, 14%) represented an emerging application area [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. Studies showed that LLMs can provide psychological support through tailored recommendations addressing specific stressors [<xref ref-type="bibr" rid="ref18">18</xref>], identifying barriers to behavior change [<xref ref-type="bibr" rid="ref24">24</xref>], and offering coping strategies for patients with diabetes [<xref ref-type="bibr" rid="ref38">38</xref>]. Emotional support is increasingly recognized as essential in chronic disease management [<xref ref-type="bibr" rid="ref49">49</xref>], which helps patients overcome psychological barriers to treatment adherence and lifestyle modifications.</p></sec><sec id="s4-4"><title>Practitioner-Centered Tasks</title><p>Practitioner-centered tasks (11/29, 38%) mainly revolved around clinical decision support and medical predictions. Clinical decision support applications (8/29, 28%) provided health care practitioners with actionable information to enhance decision-making. Studies demonstrated that LLMs can generate personalized medical reports, generate treatment recommendations, and support diagnostic processes that assist health care specialists in making informed decisions [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. However, while LLMs enhance diagnostic efficiency, concerns regarding inconsistent outputs pose barriers to clinical adoption. Medical prediction applications account for (7/29, 24%) of LLM use in chronic disease management, showing strong potential for early disease detection and risk stratification. By integrating structured and unstructured clinical data, such as lab results, clinical notes, and imaging, LLMs enable more comprehensive and accurate predictive models compared with traditional methods [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>].</p><p>Notably, real-time risk assessment tools, like the ambulatory device developed for sickle cell anemia management [<xref ref-type="bibr" rid="ref20">20</xref>], demonstrate how LLMs can predict complications before symptoms appear. However, challenges relating to medical accuracy still limit their seamless integration into clinical workflows [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. A significant emerging trend is the development of retrieval-augmented generation (RAG) frameworks that enhance prediction accuracy by dynamically incorporating up-to-date medical knowledge [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref50">50</xref>-<xref ref-type="bibr" rid="ref53">53</xref>]. Future advancements should focus on seamless integration of LLMs with existing medical systems, wearable health technologies, and mobile health applications to improve interoperability and trustworthiness.</p></sec><sec id="s4-5"><title>Methodological Quality and Strength of Evidence</title><p>The methodological quality assessment revealed notable trends that should be considered while interpreting the results of this scoping review. High-quality studies (18/29, 62%) were not uniformly distributed across LLM application tasks, studies examining medical prediction applications [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>] and patient education and information provision [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>] had significantly stronger methodological rigor. Studies investigating emotional support showed mixed quality, with some high-quality qualitative research [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref24">24</xref>], a quantitative descriptive study [<xref ref-type="bibr" rid="ref37">37</xref>] alongside a moderate-quality mixed study [<xref ref-type="bibr" rid="ref38">38</xref>]. Notably, self-management and disease monitoring studies showed significant methodological heterogeneity, with quality ratings ranging from low to high.</p><p>Journal articles demonstrated a substantially higher proportion of high-quality studies (13/18, 72%) compared with conference papers (3/8, 38%) and preprints (1/3, 33%), suggesting that peer-review processes enhance methodological quality. The identified common limitations, including inadequate sampling strategies, limited participant demographic reporting, and insufficient methodological transparency, were more prevalent in lower-tier publication sources, including conference papers and preprints. Quantitative descriptive studies, especially those focused on system design and prototype testing (10/29, 34.5%), showed mixed quality ratings ranging from low to high, with a common limitation being the use of synthetic data, lack of clinical validation, as they commonly prioritized technical utility. These methodological patterns significantly influence the reliability of the findings.</p></sec><sec id="s4-6"><title>Challenges and Corresponding Recommendations</title><p>This section discusses the key challenges identified in the review and presents corresponding recommendations to mitigate these issues. Each challenge is followed by practical solutions to enhance the applicability of LLMs in chronic disease management.</p></sec><sec id="s4-7"><title>Inaccurate Data and Inconsistencies in Responses</title><p>Inaccurate and inconsistent responses emerged as a significant challenge (18/29, 62%), primarily due to poor data quality, inherent biases in training datasets, and the limited scope of knowledge constrained by the model&#x2019;s training cutoff [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. Given that LLM performance is intrinsically linked to data quality, flawed datasets inevitably propagate errors in outputs, a manifestation of the &#x201C;garbage in, garbage out&#x201D; principle [<xref ref-type="bibr" rid="ref54">54</xref>-<xref ref-type="bibr" rid="ref56">56</xref>]. The issue of biases in AI models has gathered significant attention in recent literature [<xref ref-type="bibr" rid="ref54">54</xref>-<xref ref-type="bibr" rid="ref56">56</xref>], prompting possible migration strategies [<xref ref-type="bibr" rid="ref57">57</xref>-<xref ref-type="bibr" rid="ref59">59</xref>]. These limitations carry critical implications for health care, as erroneous LLM outputs may lead to incorrect clinical decisions, posing significant risks to patient safety [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. Hence, researchers are exploring technical solutions such as advancing domain-specific fine-tuning techniques [<xref ref-type="bibr" rid="ref60">60</xref>,<xref ref-type="bibr" rid="ref61">61</xref>], leveraging retrieval-augmented generation (RAG) frameworks [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref50">50</xref>-<xref ref-type="bibr" rid="ref52">52</xref>], and refining outputs through reinforcement learning (RL) and prompt engineering techniques [<xref ref-type="bibr" rid="ref62">62</xref>-<xref ref-type="bibr" rid="ref64">64</xref>]. In addition, implementing expert validation protocols has emerged as a crucial safeguard to ensure adherence to evidence-based practice.</p></sec><sec id="s4-8"><title>Limited Datasets</title><p>The scarcity of high-quality datasets for chronic diseases accounted for (6/29, 21%). Studies highlighted limitations and narrow coverage of publicly accessible clinical training datasets due to data privacy and institutional restrictions [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Given that experimental studies often require substantial model training datasets, the absence of adequate data poses a significant challenge to the effectiveness and success of these studies. Therefore, to address this gap, synthetic datasets and data augmentation techniques have been explored by studies [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref65">65</xref>]. However, these methods risk amplifying pre-existing biases in source data [<xref ref-type="bibr" rid="ref54">54</xref>-<xref ref-type="bibr" rid="ref56">56</xref>]. Therefore, dataset validation is essential to ensure quality, collaborative partnerships with health care institutions to access real-world clinical datasets and knowledge distillation techniques, where smaller models can be trained on outputs from larger, clinically validated models, reducing dependency on raw data volume, can be explored [<xref ref-type="bibr" rid="ref66">66</xref>].</p></sec><sec id="s4-9"><title>Computational Resources and Technical Challenges</title><p>The computational demands of training LLMs for health care applications remain a significant challenge (6/29, 21%). Consequently, low computing resources approaches, such as quantization, parameter-efficient fine-tuning (PEFT) techniques like Low-Rank Adaptation (LORA), Quantized LoRA (QLORA), Weight-Decomposed Low-Rank Adaptation, and REFT [<xref ref-type="bibr" rid="ref67">67</xref>-<xref ref-type="bibr" rid="ref73">73</xref>], are evolving and are popularly used in fine-tuning LLMs in low-resource computing environments. In addition, adapter-based tuning methods provide lightweight alternatives by injecting trainable adapter layers into frozen pretrained models, enabling task-specific fine-tuning without updating the entire model parameters [<xref ref-type="bibr" rid="ref74">74</xref>,<xref ref-type="bibr" rid="ref75">75</xref>]. Building on these advances, the development of lightweight LLMs optimized for mobile devices presents a promising direction for extending AI-based chronic disease management to resource-constrained settings [<xref ref-type="bibr" rid="ref76">76</xref>].</p></sec><sec id="s4-10"><title>Usability and Accessibility Concerns</title><p>Usability and accessibility concerns accounted for (9/29, 31%), including issues with text-only interfaces for some LLMs, cultural misalignments, and outputs ill-suited for pediatric or elderly populations [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. While studies highlighted text-only interfaces as a critical limitation of LLMs in health care [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref40">40</xref>], recent advances in multimodal architectures have addressed this gap. These models now integrate and interpret diverse data modalities, including medical images, audio, and structured documents, while generating composite textual and visual outputs [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref77">77</xref>-<xref ref-type="bibr" rid="ref79">79</xref>]. In addition, having dynamic and simplified interfaces to accommodate low-digital-literacy users, pediatric users, and for different cultural and language settings could further improve LLM usability and adaptation.</p></sec><sec id="s4-11"><title>Legal, Ethical Issues, and Regulatory Issues</title><p>Legal, ethical, and regulatory concerns (10/29, 35%) remain a key issue in LLM adoption in health care. Studies identified data privacy, biases, misinformation, responsibility, and accountability for LLM-generated content as key concerns. Although several AI frameworks have been proposed, the absence of standardized guidelines and regulatory approvals creates significant gaps [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref80">80</xref>-<xref ref-type="bibr" rid="ref82">82</xref>]. This regulatory vacuum risks inconsistent model development and validation practices, unaddressed ethical dilemmas regarding accountability for AI-generated recommendations, and potential mismatches between rapidly evolving LLM capabilities and static health care regulations [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref83">83</xref>]. Addressing these ethical concerns requires standard guidelines and rules to ensure responsible use in health care settings[<xref ref-type="bibr" rid="ref84">84</xref>]. Therefore, future efforts must prioritize the development of a comprehensive regulatory framework.</p></sec><sec id="s4-12"><title>LLM Evaluation</title><p>Evaluation gaps accounted for (5/29, 17%), reflecting critical shortcomings in current methodologies for assessing LLM performance in clinical contexts. The existing automated evaluation metrics mainly focus on predictive performance using metrics such as accuracy and <italic>F</italic><sub>1</sub>-scores that lack medical and treatment knowledge [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref85">85</xref>]. These metrics may produce misleading conclusions if not appropriately selected [<xref ref-type="bibr" rid="ref85">85</xref>]. Furthermore, human evaluation, although valuable, introduces subjectivity and interrater variability, yet the absence of a standardized LLM evaluation framework makes attaining consensus among human raters challenging [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref85">85</xref>]. A promising direction involves a hybrid evaluation approach that integrates human expert reviews with automated metrics. Future efforts should prioritize the development of standardized LLM evaluation frameworks tailored to health care settings. <xref ref-type="table" rid="table3">Table 3</xref> summarizes the challenges associated with applying LLMs in chronic disease management with proposed recommendations.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Key challenges and recommendations.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Challenge</td><td align="left" valign="bottom">Key observations</td><td align="left" valign="bottom">Recommendation</td></tr></thead><tbody><tr><td align="left" valign="bottom">Inaccurate and inconsistent responses</td><td align="left" valign="bottom">Hallucinations, inconsistent responses, outdated knowledge</td><td align="left" valign="bottom">Adopting RAG frameworks, fine-tuning, and prompt engineering. RL, expert validation of LLM-generated recommendations</td></tr><tr><td align="left" valign="top">Limited datasets</td><td align="left" valign="top">Scarcity of datasets, missing data, and dataset imbalances.</td><td align="left" valign="top">Use synthetic data, data augmentation, partnerships with health care institutions, knowledge distillation</td></tr><tr><td align="left" valign="top">Computational demands</td><td align="left" valign="top">High computational demands</td><td align="left" valign="top">Adopt PEFT (LORA and QLORA), quantization use lightweight models for mobile devices</td></tr><tr><td align="left" valign="top">Ethical and privacy concerns</td><td align="left" valign="top">Privacy concerns, language and cultural barriers, and lack of regulatory oversight</td><td align="left" valign="top">Develop a regulatory framework</td></tr><tr><td align="left" valign="top">Usability issues</td><td align="left" valign="top">Restriction to textual inputs, lack of empathy, ineffectiveness in emergencies Age-appropriate interaction</td><td align="left" valign="top">Use multimodal LLMs. Dynamic interfaces to accommodate low-digital-literacy users, age-appropriate interaction modes customize to different cultural settings Integrate with wearable devices and mobile health apps</td></tr><tr><td align="left" valign="top">Evaluation challenges</td><td align="left" valign="top">Predictive performance metrics, subjectivity, and interrater variability</td><td align="left" valign="top">Develop a standardized LLM evaluation framework for health care.</td></tr></tbody></table></table-wrap></sec><sec id="s4-13"><title>Limitations of the Study</title><p>Although quality assessment was conducted using MMAT, studies were not excluded based on their methodological rigor. As a result, including moderate and low-quality studies may have influenced the reliability and consistency of the reviewer&#x2019;s findings. The varied methodological designs across studies may have affected the interpretation of results and conclusions drawn. Furthermore, the review was limited to only English-language publications, which may have introduced language bias and limited the generalizability of our findings, particularly in contexts where LLMs are being adapted to local languages or integrated into culturally specific health care practices. The exclusion of databases such as Embase and Web of Science may have limited the comprehensiveness of the search. Future research can consider broader database coverage and non-English sources to enhance diversity and scope.</p></sec><sec id="s4-14"><title>Implications for Practice and Future Research</title><p>This scoping review reveals several critical implications for the integration of LLMs in chronic disease management. First, it is essential to address the accuracy issues identified in 18/29, 62% of studies. This calls for both technical solutions (domain-specific fine-tuning, reinforcement learning (RL), and retrieval-augmented generation (RAG) frameworks [<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref64">64</xref>]) and nontechnical solutions (expert validation and collaborative partnerships with health care institutions to access and use real-world clinical data). Second, enhancing accessibility across diverse patient populations requires developing culturally adapted LLM interfaces, implementing age-appropriate interaction modes [<xref ref-type="bibr" rid="ref40">40</xref>], and integrating with low-resource platforms such as SMS-based systems and lightweight mobile apps for various populations [<xref ref-type="bibr" rid="ref76">76</xref>]. Third, robust governance frameworks must be established to address the ethical, legal, and privacy concerns noted in 10/29, 35% of studies to ensure regulatory compliance.</p><p>Finally, future research should focus on multimodal LLMs that can synthesize diverse data inputs from wearable devices and mobile health applications for holistic patient monitoring [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] and develop resource-efficient deployment techniques. The predominance of diabetes-focused studies (14/29, 48%) highlights a potential research gap in addressing other prevalent chronic conditions. Similarly, there is a research gap in age-inclusive LLM design, given the overwhelming focus on adult populations (28/29, 96%) and the lack of pediatric studies. Addressing these gaps would enhance the clinical relevance and equitable application of LLMs across the full spectrum of chronic disease management.</p></sec><sec id="s4-15"><title>Conclusion</title><p>This scoping review highlights the growing potential of LLMs in supporting chronic disease management through patient education, diagnosis and treatment, emotional support, self-management support, decision support, and prediction tasks. While LLMs offer promising capabilities, their effective integration into health care still requires addressing key challenges related to accuracy, accessibility, usability, and ethical and privacy concerns. Future research should focus on integrating LLMs with mobile and wearable technologies, creating culturally and age-appropriate interfaces, and exploring integration with low-resource platforms. Addressing these research gaps will ensure equitable and safe use of LLMs across diverse health care settings.</p></sec></sec></body><back><ack><p>The authors acknowledge the valuable inputs of the reviewers and the editor, and the support of Mr. Bazekketa Datson of Ndejje University, Faculty of Science and Computing, for illustrating the figures used in this scoping review. The authors did not receive any funding for this review study. During the preparation of this work, the authors used Grammarly to improve the readability and language of the paper. After using this tool, the authors reviewed and edited the content as needed and took full responsibility for the content of the publication.</p></ack><fn-group><fn fn-type="con"><p>SHM contributed to conceptualization, methodology, data extraction and analysis, writing-original draft, and writing-review and editing. OJ managed conceptualization, methodology, data extraction and analysis, and writing-review and editing. HKN handled data extraction and analysis and review and editing. CGO, PS, PW, and NK contributed to review and critical input.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb2">PRISMA-ScR</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews</p></def></def-item><def-item><term id="abb3">RAG</term><def><p>retrieval augmented generation</p></def></def-item><def-item><term id="abb4">RL</term><def><p>reinforcement learning</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bardhan</surname><given-names>I</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>H</given-names> </name><name name-style="western"><surname>Karahanna</surname><given-names>E</given-names> </name></person-group><article-title>Connecting systems, data, and people: a multidisciplinary research roadmap for chronic disease management</article-title><source>MIS Q</source><year>2020</year><month>03</month><day>1</day><volume>44</volume><issue>1</issue><fpage>185</fpage><lpage>200</lpage><pub-id pub-id-type="doi">10.25300/MISQ/2020/14644</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hacker</surname><given-names>K</given-names> </name></person-group><article-title>The burden of chronic disease</article-title><source>Mayo Clinic Proceedings: Innovations, Quality &#x0026; Outcomes</source><year>2024</year><month>02</month><volume>8</volume><issue>1</issue><fpage>112</fpage><lpage>119</lpage><pub-id pub-id-type="doi">10.1016/j.mayocpiqo.2023.08.005</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holmen</surname><given-names>H</given-names> </name><name name-style="western"><surname>Larsen</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Sallinen</surname><given-names>MH</given-names> </name><etal/></person-group><article-title>Working with patients suffering from chronic diseases can be a balancing act for health care professionals - a meta-synthesis of qualitative studies</article-title><source>BMC Health Serv Res</source><year>2020</year><month>02</month><day>10</day><volume>20</volume><issue>1</issue><fpage>98</fpage><pub-id pub-id-type="doi">10.1186/s12913-019-4826-2</pub-id><pub-id pub-id-type="medline">32039723</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xie</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Integration of artificial intelligence, blockchain, and wearable technology for chronic disease management: a new paradigm in smart healthcare</article-title><source>CURR MED SCI</source><year>2021</year><month>12</month><volume>41</volume><issue>6</issue><fpage>1123</fpage><lpage>1133</lpage><pub-id pub-id-type="doi">10.1007/s11596-021-2485-0</pub-id><pub-id pub-id-type="medline">34950987</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jim&#x00E9;nez-Mu&#x00F1;oz</surname><given-names>L</given-names> </name><name name-style="western"><surname>Guti&#x00E9;rrez-Rojas</surname><given-names>L</given-names> </name><name name-style="western"><surname>Porras-Segovia</surname><given-names>A</given-names> </name><name name-style="western"><surname>Courtet</surname><given-names>P</given-names> </name><name name-style="western"><surname>Baca-Garc&#x00ED;a</surname><given-names>E</given-names> </name></person-group><article-title>Mobile applications for the management of chronic physical conditions: a systematic review</article-title><source>Intern Med J</source><year>2022</year><month>01</month><volume>52</volume><issue>1</issue><fpage>21</fpage><lpage>29</lpage><pub-id pub-id-type="doi">10.1111/imj.15081</pub-id><pub-id pub-id-type="medline">33012045</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Montagna</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ferretti</surname><given-names>S</given-names> </name><name name-style="western"><surname>Klopfenstein</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Florio</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pengo</surname><given-names>MF</given-names> </name></person-group><article-title>Data decentralisation of LLM-based chatbot systems in chronic disease self-management</article-title><source>GoodIT &#x2019;23: Proceedings of the 2023 ACM Conference on Information Technology for Social Good</source><year>2023</year><month>09</month><day>6</day><fpage>205</fpage><lpage>212</lpage><pub-id pub-id-type="doi">10.1145/3582515.3609536</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Haque</surname><given-names>A</given-names> </name><name name-style="western"><surname>Chowdhury</surname><given-names>M</given-names> </name><name name-style="western"><surname>Soliman</surname><given-names>H</given-names> </name></person-group><article-title>Transforming chronic disease management with chatbots: key use cases for personalized and cost-effective care</article-title><year>2023</year><conf-name>2023 Sixth International Symposium on Computer, Consumer and Control (IS3C)</conf-name><conf-date>Jun 30 to Jul 3, 2023</conf-date><conf-loc>Taichung, Taiwan</conf-loc><fpage>367</fpage><lpage>370</lpage><pub-id pub-id-type="doi">10.1109/IS3C57901.2023.00104</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reddy</surname><given-names>S</given-names> </name></person-group><article-title>Evaluating large language models for use in healthcare: a framework for translational value assessment</article-title><source>Informatics in Medicine Unlocked</source><year>2023</year><volume>41</volume><issue>May</issue><fpage>101304</fpage><pub-id pub-id-type="doi">10.1016/j.imu.2023.101304</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Lian</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lei</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lian</surname><given-names>D</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>X</given-names> </name></person-group><article-title>Recommender AI agent: integrating large language models for interactive recommendations</article-title><source>arXiv</source><access-date>2025-08-24</access-date><comment>Preprint posted online on  Jan 30, 2024</comment><comment><ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2308.16505">https://arxiv.org/abs/2308.16505</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Snoswell</surname><given-names>CL</given-names> </name><name name-style="western"><surname>Snoswell</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Kelly</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Caffery</surname><given-names>LJ</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>AC</given-names> </name></person-group><article-title>Artificial intelligence: augmenting telehealth with large language models</article-title><source>J Telemed Telecare</source><year>2025</year><month>01</month><volume>31</volume><issue>1</issue><fpage>150</fpage><lpage>154</lpage><pub-id pub-id-type="doi">10.1177/1357633X231169055</pub-id><pub-id pub-id-type="medline">37041736</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Khan</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Mohammad</surname><given-names>N</given-names> </name></person-group><article-title>A review on large language models: architectures, applications, taxonomies, open issues and challenges</article-title><source>TechRxiv</source><comment>Preprint posted online on  Sep 27, 2023</comment><pub-id pub-id-type="doi">10.36227/techrxiv.24171183.v1</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>R</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Thirunavukarasu</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>DSW</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>N</given-names> </name></person-group><article-title>Large language models in health care: development, applications, and challenges</article-title><source>Health Care Sci</source><year>2023</year><month>08</month><volume>2</volume><issue>4</issue><fpage>255</fpage><lpage>263</lpage><pub-id pub-id-type="doi">10.1002/hcs2.61</pub-id><pub-id pub-id-type="medline">38939520</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sharma</surname><given-names>P</given-names> </name><name name-style="western"><surname>Parasa</surname><given-names>S</given-names> </name></person-group><article-title>ChatGPT and large language models in gastroenterology</article-title><source>Nat Rev Gastroenterol Hepatol</source><year>2023</year><month>08</month><volume>20</volume><issue>8</issue><fpage>481</fpage><lpage>482</lpage><pub-id pub-id-type="doi">10.1038/s41575-023-00799-8</pub-id><pub-id pub-id-type="medline">37253794</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Karabacak</surname><given-names>M</given-names> </name><name name-style="western"><surname>Margetis</surname><given-names>K</given-names> </name></person-group><article-title>Embracing large language models for medical applications: opportunities and challenges</article-title><source>Cureus</source><year>2023</year><month>05</month><volume>15</volume><issue>5</issue><fpage>e39305</fpage><pub-id pub-id-type="doi">10.7759/cureus.39305</pub-id><pub-id pub-id-type="medline">37378099</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Latif</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>J</given-names> </name></person-group><article-title>Evaluation and analysis of large language models for clinical text augmentation and generation</article-title><source>IEEE Access</source><year>2024</year><month>04</month><volume>12</volume><fpage>1</fpage><lpage>1</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2024.3384496</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>H</given-names> </name><name name-style="western"><surname>li</surname><given-names>jiaxi</given-names> </name><name name-style="western"><surname>liu</surname><given-names>siru</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name></person-group><article-title>Exploring the potential of large language models in personalized diabetes treatment strategies</article-title><source>In Review</source><comment>Preprint posted online on 2023</comment><pub-id pub-id-type="doi">10.21203/rs.3.rs-3995740/v1</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Raghu</surname><given-names>K</given-names> </name><name name-style="western"><surname>S</surname><given-names>T</given-names> </name><name name-style="western"><surname>S Devishamani</surname><given-names>C</given-names> </name><name name-style="western"><surname>M</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rajalakshmi</surname><given-names>R</given-names> </name><name name-style="western"><surname>Raman</surname><given-names>R</given-names> </name></person-group><article-title>The utility of ChatGPT in diabetic retinopathy risk assessment: a comparative study with clinical diagnosis [response to letter]</article-title><source>Clin Ophthalmol</source><year>2024</year><volume>18</volume><fpage>313</fpage><lpage>314</lpage><pub-id pub-id-type="doi">10.2147/OPTH.S461186</pub-id><pub-id pub-id-type="medline">38317795</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Song</surname><given-names>I</given-names> </name><name name-style="western"><surname>Pendse</surname><given-names>SR</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>N</given-names> </name></person-group><article-title>The typing cure: experiences with large language model chatbots for mental health support</article-title><source>arXiv</source><comment>Preprint posted online on  May 9, 2025</comment><comment><ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2401.14362">https://arxiv.org/abs/2401.14362</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>C</given-names> </name><name name-style="western"><surname>Cao</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Large language models for cuffless blood pressure measurement from wearable biosignals</article-title><source>BCB &#x2019;24: Proceedings of the 15th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics</source><fpage>1</fpage><lpage>11</lpage><pub-id pub-id-type="doi">10.1145/3698587.3701447</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Ogundare</surname><given-names>O</given-names> </name><name name-style="western"><surname>Sofolahan</surname><given-names>S</given-names> </name></person-group><article-title>Large language models in ambulatory devices for home health diagnostics: a case study of sickle cell anemia management</article-title><source>arXiv</source><comment>Preprint posted online on  May 5, 2023</comment><pub-id pub-id-type="doi">10.1007/978-3-031-40971-4_42</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cankurtaran</surname><given-names>RE</given-names> </name><name name-style="western"><surname>Polat</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Aydemir</surname><given-names>NG</given-names> </name><name name-style="western"><surname>Umay</surname><given-names>E</given-names> </name><name name-style="western"><surname>Yurekli</surname><given-names>OT</given-names> </name></person-group><article-title>Reliability and usefulness of ChatGPT for inflammatory bowel diseases: an analysis for patients and healthcare professionals</article-title><source>Cureus</source><year>2023</year><month>10</month><volume>15</volume><issue>10</issue><fpage>e46736</fpage><pub-id pub-id-type="doi">10.7759/cureus.46736</pub-id><pub-id pub-id-type="medline">38022227</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>K</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>C</given-names> </name></person-group><article-title>Knowledge-enhanced pre-training large language model for depression diagnosis and treatment</article-title><conf-name>2023 IEEE 9th International Conference on Cloud Computing and Intelligent Systems (CCIS)</conf-name><conf-date>Aug 12-13, 2023</conf-date><conf-loc>Dali, China</conf-loc><fpage>532</fpage><lpage>536</lpage><pub-id pub-id-type="doi">10.1109/CCIS59572.2023.10263217</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abdullahi</surname><given-names>T</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>R</given-names> </name><name name-style="western"><surname>Eickhoff</surname><given-names>C</given-names> </name></person-group><article-title>Learning to make rare and complex diagnoses with generative AI assistance: qualitative study of popular large language models</article-title><source>JMIR Med Educ</source><year>2024</year><month>02</month><day>13</day><volume>10</volume><fpage>e51391</fpage><pub-id pub-id-type="doi">10.2196/51391</pub-id><pub-id pub-id-type="medline">38349725</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Al-Anezi</surname><given-names>FM</given-names> </name></person-group><article-title>Exploring the use of ChatGPT as a virtual health coach for chronic disease management</article-title><source>Learn Health Syst</source><year>2024</year><month>07</month><volume>8</volume><issue>3</issue><fpage>e10406</fpage><pub-id pub-id-type="doi">10.1002/lrh2.10406</pub-id><pub-id pub-id-type="medline">39036525</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tricco</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Lillie</surname><given-names>E</given-names> </name><name name-style="western"><surname>Zarin</surname><given-names>W</given-names> </name><etal/></person-group><article-title>PRISMA extension for scoping reviews (PRISMA-ScR): checklist and explanation</article-title><source>Ann Intern Med</source><year>2018</year><month>10</month><day>2</day><volume>169</volume><issue>7</issue><fpage>467</fpage><lpage>473</lpage><pub-id pub-id-type="doi">10.7326/M18-0850</pub-id><pub-id pub-id-type="medline">30178033</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hong</surname><given-names>QN</given-names> </name><name name-style="western"><surname>F&#x00E0;bregues</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bartlett</surname><given-names>G</given-names> </name><etal/></person-group><article-title>The Mixed Methods Appraisal Tool (MMAT) version 2018 for information professionals and researchers</article-title><source>EFI</source><year>2018</year><volume>34</volume><issue>4</issue><fpage>285</fpage><lpage>291</lpage><pub-id pub-id-type="doi">10.3233/EFI-180221</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Athavale</surname><given-names>A</given-names> </name><name name-style="western"><surname>Baier</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ross</surname><given-names>E</given-names> </name><name name-style="western"><surname>Fukaya</surname><given-names>E</given-names> </name></person-group><article-title>The potential of chatbots in chronic venous disease patient management</article-title><source>JVS Vasc Insights</source><year>2023</year><volume>1</volume><fpage>100019</fpage><pub-id pub-id-type="doi">10.1016/j.jvsvi.2023.100019</pub-id><pub-id pub-id-type="medline">37701430</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Soto-Ch&#x00E1;vez</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Bustos</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Fern&#x00E1;ndez-&#x00C1;vila</surname><given-names>DG</given-names> </name><name name-style="western"><surname>Mu&#x00F1;oz</surname><given-names>OM</given-names> </name></person-group><article-title>Evaluation of information provided to patients by ChatGPT about chronic diseases in Spanish language</article-title><source>Digit Health</source><year>2024</year><volume>10</volume><fpage>20552076231224603</fpage><pub-id pub-id-type="doi">10.1177/20552076231224603</pub-id><pub-id pub-id-type="medline">38188865</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abbas</surname><given-names>S</given-names> </name><name name-style="western"><surname>Iftikhar</surname><given-names>M</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>SJ</given-names> </name></person-group><article-title>ChatGPT-assisted machine learning for chronic disease classification and prediction: a developmental and validation study</article-title><source>Cureus</source><year>2024</year><month>12</month><volume>16</volume><issue>12</issue><fpage>e75851</fpage><pub-id pub-id-type="doi">10.7759/cureus.75851</pub-id><pub-id pub-id-type="medline">39822450</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Anderson</surname><given-names>P</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>D</given-names> </name><name name-style="western"><surname>Davidson</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Bridging domains in chronic lower back pain: large language models and ontology-driven strategies for knowledge graph construction</article-title><source>bioRxiv</source><comment>Preprint posted online on  Mar 14, 2024</comment><pub-id pub-id-type="doi">10.1101/2024.03.11.584505</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Ding</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Thao</surname><given-names>PNM</given-names> </name><name name-style="western"><surname>Peng</surname><given-names>WC</given-names> </name><etal/></person-group><article-title>Large language multimodal models for 5-year chronic disease cohort prediction using EHR data</article-title><source>arXiv</source><comment>Preprint posted online on  Aug 29, 2024</comment><comment><ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2403.04785">https://arxiv.org/abs/2403.04785</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jairoun</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Al-Hemyari</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Shahwan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Al-Qirim</surname><given-names>T</given-names> </name><name name-style="western"><surname>Shahwan</surname><given-names>M</given-names> </name></person-group><article-title>Benefit-risk assessment of ChatGPT applications in the field of diabetes and metabolic illnesses: a qualitative study</article-title><source>Clin Med Insights Endocrinol Diabetes</source><year>2024</year><volume>17</volume><fpage>11795514241235514</fpage><pub-id pub-id-type="doi">10.1177/11795514241235514</pub-id><pub-id pub-id-type="medline">38495947</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Mondal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Naskar</surname><given-names>A</given-names> </name></person-group><article-title>Artificial intelligence in diabetes care: evaluating GPT-4&#x2019;s competency in reviewing diabetic patient management plan in comparison to expert review</article-title><source>Endocrinology (including Diabetes Mellitus and Metabolic Disease)</source><fpage>2024</fpage><pub-id pub-id-type="doi">10.1101/2024.04.12.24305732</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>W</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Few-shot learning for chronic disease management: leveraging large language models and multi-prompt engineering with medical knowledge injection</article-title><source>Proceedings of the 58th Hawaii International Conference on System Sciences</source><year>2025</year><pub-id pub-id-type="doi">10.24251/HICSS.2025.084</pub-id><pub-id pub-id-type="medline">38681743</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liao</surname><given-names>C</given-names> </name><name name-style="western"><surname>Kuo</surname><given-names>WT</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>IH</given-names> </name><etal/></person-group><article-title>EHR-based mobile and web platform for chronic disease risk prediction using large language multimodal models</article-title><source>CIKM &#x2019;24: Proceedings of the 33rd ACM International Conference on Information and Knowledge Management</source><year>2024</year><month>10</month><day>21</day><fpage>5244</fpage><lpage>5248</lpage><pub-id pub-id-type="doi">10.1145/3627673.3679227</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ding</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Thao</surname><given-names>PNM</given-names> </name><name name-style="western"><surname>Peng</surname><given-names>WC</given-names> </name><etal/></person-group><article-title>Large language multimodal models for new-onset type 2 diabetes prediction using five-year cohort electronic health records</article-title><source>Sci Rep</source><year>2024</year><month>09</month><day>6</day><volume>14</volume><issue>1</issue><fpage>20774</fpage><pub-id pub-id-type="doi">10.1038/s41598-024-71020-2</pub-id><pub-id pub-id-type="medline">39237580</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Dao</surname><given-names>D</given-names> </name><name name-style="western"><surname>Teo</surname><given-names>JYC</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>W</given-names> </name><name name-style="western"><surname>Nguyen</surname><given-names>HD</given-names> </name></person-group><article-title>LLM-powered multimodal AI conversations for diabetes prevention</article-title><year>2024</year><month>06</month><day>10</day><access-date>2025-09-02</access-date><conf-name>ICMR &#x2019;24: International Conference on Multimedia Retrieval</conf-name><conf-loc>Phuket, Thailand</conf-loc><fpage>1</fpage><lpage>6</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/proceedings/10.1145/3643479">https://dl.acm.org/doi/proceedings/10.1145/3643479</ext-link></comment></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khan</surname><given-names>M</given-names> </name></person-group><article-title>Assessing the efficacy of ChatGPT in facilitating self-management strategies among diabetic patients section</article-title><source>European Chemical Bulletin</source><year>2023</year><access-date>2025-09-02</access-date><volume>12</volume><issue>10</issue><fpage>10490</fpage><lpage>10502</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.researchgate.net/publication/373420033_Assessing_the_Efficacy_of_ChatGPT_in_Facilitating_Self-Management_Strategies_among_Diabetic_Patients_Section_A-Research_paper_10490_Eur">https://www.researchgate.net/publication/373420033_Assessing_the_Efficacy_of_ChatGPT_in_Facilitating_Self-Management_Strategies_among_Diabetic_Patients_Section_A-Research_paper_10490_Eur</ext-link></comment></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mondal</surname><given-names>H</given-names> </name><name name-style="western"><surname>Dash</surname><given-names>I</given-names> </name><name name-style="western"><surname>Mondal</surname><given-names>S</given-names> </name><name name-style="western"><surname>Behera</surname><given-names>JK</given-names> </name></person-group><article-title>ChatGPT in answering queries related to lifestyle-related diseases and disorders</article-title><source>Cureus</source><year>2023</year><month>11</month><pub-id pub-id-type="doi">10.7759/cureus.48296</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Young</surname><given-names>CC</given-names> </name><name name-style="western"><surname>Enichen</surname><given-names>E</given-names> </name><name name-style="western"><surname>Rao</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Pilot study of large language models as an age-appropriate explanatory tool for chronic pediatric conditions</article-title><source>medRxiv</source><comment>Preprint posted online on  Aug 7, 2024</comment><pub-id pub-id-type="doi">10.1101/2024.08.06.24311544</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>J</given-names> </name><name name-style="western"><surname>Guan</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Integrated image-based deep learning and language models for primary diabetes care</article-title><source>Nat Med</source><year>2024</year><month>10</month><volume>30</volume><issue>10</issue><fpage>2886</fpage><lpage>2896</lpage><pub-id pub-id-type="doi">10.1038/s41591-024-03139-8</pub-id><pub-id pub-id-type="medline">39030266</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Ying</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yuan</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Exploration of ChatGPT application in diabetes education based on multi-dataset</article-title><source>medRxiv</source><comment>Preprint posted online on  Sep 27, 2023</comment><pub-id pub-id-type="doi">10.1101/2023.09.27.23296144</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>H</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Guan</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Large language models for diabetes training: a prospective study</article-title><source>Sci Bull Sci Found Philipp</source><year>2025</year><month>03</month><volume>70</volume><issue>6</issue><fpage>934</fpage><lpage>942</lpage><pub-id pub-id-type="doi">10.1016/j.scib.2025.01.034</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Hussain</surname><given-names>W</given-names> </name><name name-style="western"><surname>Grundy</surname><given-names>J</given-names> </name></person-group><article-title>Advice for diabetes self-management by ChatGPT models: challenges and recommendations</article-title><source>arXiv</source><comment>Preprint posted online on  Jan 14, 2025</comment><comment><ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2501.07931">https://arxiv.org/abs/2501.07931</ext-link></comment></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ye</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Enhancement of the performance of large language models in diabetes education through retrieval-augmented generation: comparative study</article-title><source>J Med Internet Res</source><year>2024</year><month>11</month><day>8</day><volume>26</volume><fpage>e58041</fpage><pub-id pub-id-type="doi">10.2196/58041</pub-id><pub-id pub-id-type="medline">39046096</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eaton</surname><given-names>C</given-names> </name><name name-style="western"><surname>Vallejo</surname><given-names>N</given-names> </name><name name-style="western"><surname>McDonald</surname><given-names>X</given-names> </name><etal/></person-group><article-title>User engagement with mHealth interventions to promote treatment adherence and self-management in people with chronic health conditions: systematic review</article-title><source>J Med Internet Res</source><year>2024</year><month>09</month><day>24</day><volume>26</volume><fpage>e50508</fpage><pub-id pub-id-type="doi">10.2196/50508</pub-id><pub-id pub-id-type="medline">39316431</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cheikh-Moussa</surname><given-names>K</given-names> </name><name name-style="western"><surname>Mira</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Orozco-Beltran</surname><given-names>D</given-names> </name></person-group><article-title>Improving engagement among patients with chronic cardiometabolic conditions using mHealth: critical review of reviews</article-title><source>JMIR Mhealth Uhealth</source><year>2020</year><month>04</month><day>8</day><volume>8</volume><issue>4</issue><fpage>e15446</fpage><pub-id pub-id-type="doi">10.2196/15446</pub-id><pub-id pub-id-type="medline">32267239</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mattison</surname><given-names>G</given-names> </name><name name-style="western"><surname>Canfell</surname><given-names>O</given-names> </name><name name-style="western"><surname>Forrester</surname><given-names>D</given-names> </name><etal/></person-group><article-title>The influence of wearables on health care outcomes in chronic disease: systematic review</article-title><source>J Med Internet Res</source><year>2022</year><month>07</month><day>1</day><volume>24</volume><issue>7</issue><fpage>e36690</fpage><pub-id pub-id-type="doi">10.2196/36690</pub-id><pub-id pub-id-type="medline">35776492</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Laranjo</surname><given-names>L</given-names> </name><name name-style="western"><surname>Dunn</surname><given-names>AG</given-names> </name><name name-style="western"><surname>Tong</surname><given-names>HL</given-names> </name><etal/></person-group><article-title>Conversational agents in healthcare: a systematic review</article-title><source>J Am Med Inform Assoc</source><year>2018</year><month>09</month><day>1</day><volume>25</volume><issue>9</issue><fpage>1248</fpage><lpage>1258</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocy072</pub-id><pub-id pub-id-type="medline">30010941</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Lewis</surname><given-names>P</given-names> </name><name name-style="western"><surname>Perez</surname><given-names>E</given-names> </name><name name-style="western"><surname>Pictus</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Retrieval-augmented generation for knowledge-intensive NLP tasks</article-title><source>arXiv</source><comment>Preprint posted online on  Apr 12, 2021</comment><pub-id pub-id-type="doi">10.48550/arXiv.2005.11401</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zakka</surname><given-names>C</given-names> </name><name name-style="western"><surname>Chaurasia</surname><given-names>A</given-names> </name><name name-style="western"><surname>Shad</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Almanac: retrieval-augmented language models for clinical medicine</article-title><source>Res Sq</source><year>2023</year><month>05</month><day>2</day><fpage>rs.3.rs-2883198</fpage><pub-id pub-id-type="doi">10.21203/rs.3.rs-2883198/v1</pub-id><pub-id pub-id-type="medline">37205549</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>X</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>W</given-names> </name></person-group><article-title>Augmenting black-box llms with medical textbooks for clinical question answering</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 23, 2025</comment><pub-id pub-id-type="doi">10.48550/arXiv.2309.02233</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xiong</surname><given-names>G</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>A</given-names> </name></person-group><article-title>Benchmarking retrieval-augmented generation for medicine</article-title><source>Findings of the Association for Computational Linguistics ACL 2024</source><year>2024</year><fpage>6233</fpage><lpage>6251</lpage><pub-id pub-id-type="doi">10.18653/v1/2024.findings-acl.372</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>H</given-names> </name><name name-style="western"><surname>Moon</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Purkayastha</surname><given-names>S</given-names> </name><name name-style="western"><surname>Celi</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Trivedi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Gichoya</surname><given-names>JW</given-names> </name></person-group><article-title>Ethics of large language models in medicine and medical research</article-title><source>Lancet Digit Health</source><year>2023</year><month>06</month><volume>5</volume><issue>6</issue><fpage>e333</fpage><lpage>e335</lpage><pub-id pub-id-type="doi">10.1016/S2589-7500(23)00083-3</pub-id><pub-id pub-id-type="medline">37120418</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Navigli</surname><given-names>R</given-names> </name><name name-style="western"><surname>Conia</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ross</surname><given-names>B</given-names> </name></person-group><article-title>Biases in large language models: origins, inventory, and discussion</article-title><source>J Data and Information Quality</source><year>2023</year><month>06</month><day>30</day><volume>15</volume><issue>2</issue><fpage>1</fpage><lpage>21</lpage><pub-id pub-id-type="doi">10.1145/3597307</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Liang</surname><given-names>PP</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Morency</surname><given-names>LP</given-names> </name><name name-style="western"><surname>Salakhutdinov</surname><given-names>R</given-names> </name></person-group><article-title>Towards understanding and mitigating social biases in language models</article-title><source>arXiv</source><comment>Preprint posted online on  Jun 24, 2021</comment><pub-id pub-id-type="doi">10.48550/arXiv.2106.13219</pub-id><pub-id pub-id-type="medline">34545335</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Verma</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ernst</surname><given-names>M</given-names> </name><name name-style="western"><surname>Just</surname><given-names>R</given-names> </name></person-group><article-title>Removing biased data to improve fairness and accuracy</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 5, 2021</comment><pub-id pub-id-type="doi">10.48550/arXiv.2102.03054</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>E</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>J</given-names> </name><name name-style="western"><surname>Choo</surname><given-names>J</given-names> </name></person-group><article-title>BiaSwap: removing dataset bias with bias-tailored swapping augmentation</article-title><year>2021</year><conf-name>2021 IEEE/CVF International Conference on Computer Vision (ICCV)</conf-name><conf-date>Oct 10-17, 2021</conf-date><conf-loc>Montreal, QC, Canada</conf-loc><fpage>14972</fpage><lpage>14981</lpage><pub-id pub-id-type="doi">10.1109/ICCV48922.2021.01472</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ernst</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Marton</surname><given-names>S</given-names> </name><name name-style="western"><surname>Brinkmann</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Bias mitigation for large language models using adversarial learning</article-title><year>2023</year><access-date>2025-08-24</access-date><conf-name>CEUR Workshop on Fairness and Bias in AI</conf-name><conf-loc>Krakow, Poland</conf-loc><comment><ext-link ext-link-type="uri" xlink:href="https://ceur-ws.org/Vol-3523/paper11.pdf">https://ceur-ws.org/Vol-3523/paper11.pdf</ext-link></comment></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>K</given-names> </name><name name-style="western"><surname>Dan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name></person-group><article-title>ChatDoctor: a medical chat model fine-tuned on a Large Language Model Meta-AI (LLaMA) using medical domain knowledge</article-title><source>Cureus</source><year>2023</year><month>06</month><volume>15</volume><issue>6</issue><fpage>e40895</fpage><pub-id pub-id-type="doi">10.7759/cureus.40895</pub-id><pub-id pub-id-type="medline">37492832</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alghanmi</surname><given-names>I</given-names> </name><name name-style="western"><surname>Espinosa-Anke</surname><given-names>L</given-names> </name><name name-style="western"><surname>Schockaert</surname><given-names>S</given-names> </name></person-group><article-title>Self-supervised intermediate fine-tuning of biomedical language models for interpreting patient case descriptions</article-title><source>Proceedings of the 29th International Conference on Computational Linguistics</source><year>2022</year><access-date>2025-09-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2022.coling-1.123/">https://aclanthology.org/2022.coling-1.123/</ext-link></comment></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heston</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Khun</surname><given-names>C</given-names> </name></person-group><article-title>Prompt engineering in medical education</article-title><source>IME</source><year>2023</year><volume>2</volume><issue>3</issue><fpage>198</fpage><lpage>205</lpage><pub-id pub-id-type="doi">10.3390/ime2030019</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mesk&#x00F3;</surname><given-names>B</given-names> </name></person-group><article-title>Prompt engineering as an important emerging skill for medical professionals: tutorial</article-title><source>J Med Internet Res</source><year>2023</year><month>10</month><day>4</day><volume>25</volume><fpage>e50638</fpage><pub-id pub-id-type="doi">10.2196/50638</pub-id><pub-id pub-id-type="medline">37792434</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Shi</surname><given-names>E</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Prompt engineering for healthcare: methodologies and applications</article-title><source>arXiv</source><comment>Preprint posted online on  Apr 23, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2304.14670</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Sarker</surname><given-names>S</given-names> </name><name name-style="western"><surname>Qian</surname><given-names>L</given-names> </name><name name-style="western"><surname>Dong</surname><given-names>X</given-names> </name></person-group><article-title>Medical data augmentation via chatgpt: a case study on medication identification and medication event classification</article-title><source>arXiv</source><comment>Preprint posted online on  Jun 10, 2023</comment><comment><ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2306.07297">https://arxiv.org/abs/2306.07297</ext-link></comment></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Survey on knowledge distillation for large language models: methods, evaluation, and application</article-title><source>ACM Trans Intell Syst Technol</source><year>2024</year><pub-id pub-id-type="doi">10.1145/3699518</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Liao</surname><given-names>B</given-names> </name><name name-style="western"><surname>Meng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Monz</surname><given-names>C</given-names> </name></person-group><article-title>Parameter-efficient fine-tuning without introducing new latency</article-title><year>2023</year><conf-name>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1</conf-name><fpage>4242</fpage><lpage>4260</lpage><pub-id pub-id-type="doi">10.18653/v1/2023.acl-long.233</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ding</surname><given-names>N</given-names> </name><name name-style="western"><surname>Qin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Parameter-efficient fine-tuning of large-scale pre-trained language models</article-title><source>Nat Mach Intell</source><year>2023</year><volume>5</volume><issue>3</issue><fpage>220</fpage><lpage>235</lpage><pub-id pub-id-type="doi">10.1038/s42256-023-00626-4</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Tam</surname><given-names>D</given-names> </name><name name-style="western"><surname>Muqeeth</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Few-shot parameter-efficient fine-tuning is better and cheaper than in-context learning</article-title><source>arXiv</source><comment>Preprint posted online on  Aug 26, 2022</comment><pub-id pub-id-type="doi">10.48550/arXiv.2205.05638</pub-id></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Hu</surname><given-names>E</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wallis</surname><given-names>P</given-names> </name><etal/></person-group><article-title>LoRA: low-rank adaptation of large language models</article-title><source>arXiv</source><comment>Preprint posted online on  Oct 16, 2021</comment><pub-id pub-id-type="doi">10.48550/arXiv.2106.09685</pub-id></nlm-citation></ref><ref id="ref71"><label>71</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Dettmers</surname><given-names>T</given-names> </name><name name-style="western"><surname>Pagnoni</surname><given-names>A</given-names> </name><name name-style="western"><surname>Holtzman</surname><given-names>A</given-names> </name><name name-style="western"><surname>Zettlemoyer</surname><given-names>L</given-names> </name></person-group><article-title>QLoRA: efficient finetuning of quantized LLMs</article-title><source>arXiv</source><comment>Preprint posted online on  May 23, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2305.14314</pub-id></nlm-citation></ref><ref id="ref72"><label>72</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>SY</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>CY</given-names> </name><name name-style="western"><surname>Yin</surname><given-names>H</given-names> </name><etal/></person-group><article-title>DoRA: weight-decomposed low-rank adaptation</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 9, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2402.09353</pub-id></nlm-citation></ref><ref id="ref73"><label>73</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Arora</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>ReFT: representation finetuning for language models</article-title><source>arXiv</source><comment>Preprint posted online on  May 22, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2404.03592</pub-id></nlm-citation></ref><ref id="ref74"><label>74</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Houlsby</surname><given-names>N</given-names> </name><name name-style="western"><surname>Giurgiu</surname><given-names>A</given-names> </name><name name-style="western"><surname>Jastrzebski</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Parameter-efficient transfer learning for NLP</article-title><source>arXiv</source><comment>Preprint posted online on  Jun 13, 2019</comment><pub-id pub-id-type="doi">10.48550/arXiv.1902.00751</pub-id></nlm-citation></ref><ref id="ref75"><label>75</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>XL</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>P</given-names> </name></person-group><article-title>Prefix-tuning: optimizing continuous prompts for generation</article-title><source>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1)</source><comment>Preprint posted online on  Jan, 2021</comment><pub-id pub-id-type="doi">10.18653/v1/2021.acl-long.353</pub-id></nlm-citation></ref><ref id="ref76"><label>76</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Dang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kostakos</surname><given-names>V</given-names> </name><name name-style="western"><surname>Jia</surname><given-names>H</given-names> </name></person-group><article-title>Efficient and personalized mobile health event prediction via small language models</article-title><year>2024</year><month>12</month><day>4</day><conf-name>Proceedings of the 30th Annual International Conference on Mobile Computing and Networking (MobiCom &#x2019;24)</conf-name><fpage>2353</fpage><lpage>2358</lpage><pub-id pub-id-type="doi">10.1145/3636534.3698123</pub-id></nlm-citation></ref><ref id="ref77"><label>77</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mesk&#x00F3;</surname><given-names>B</given-names> </name></person-group><article-title>The impact of multimodal large language models on health care&#x2019;s future</article-title><source>J Med Internet Res</source><year>2023</year><month>11</month><day>2</day><volume>25</volume><fpage>e52865</fpage><pub-id pub-id-type="doi">10.2196/52865</pub-id><pub-id pub-id-type="medline">37917126</pub-id></nlm-citation></ref><ref id="ref78"><label>78</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Belyaeva</surname><given-names>A</given-names> </name><name name-style="western"><surname>Cosentino</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hormozdiari</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Multimodal llms for health grounded in individual-specific data</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 20, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2307.09018</pub-id></nlm-citation></ref><ref id="ref79"><label>79</label><nlm-citation citation-type="other"><person-group person-group-type="author"><collab>Gemini Team Google</collab></person-group><article-title>Gemini: a family of highly capable multimodal models</article-title><source>arXiv</source><comment>Preprint posted online on  May 9, 2025</comment><pub-id pub-id-type="doi">10.48550/arXiv.2312.11805</pub-id></nlm-citation></ref><ref id="ref80"><label>80</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ali</surname><given-names>H</given-names> </name><name name-style="western"><surname>Qadir</surname><given-names>J</given-names> </name><name name-style="western"><surname>Alam</surname><given-names>T</given-names> </name><name name-style="western"><surname>Househ</surname><given-names>M</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>Z</given-names> </name></person-group><article-title>ChatGPT and large language models in healthcare: opportunities and risks</article-title><year>2023</year><conf-name>IEEE International Conference on Artificial Intelligence, Blockchain, and Internet of Things (AIBThings)</conf-name><pub-id pub-id-type="doi">10.1109/AIBThings58340.2023.10291020</pub-id></nlm-citation></ref><ref id="ref81"><label>81</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nasir</surname><given-names>S</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Bai</surname><given-names>S</given-names> </name></person-group><article-title>Ethical framework for harnessing the power of AI in healthcare and beyond</article-title><source>IEEE Access</source><year>2024</year><volume>12</volume><fpage>31014</fpage><lpage>31035</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2024.3369912</pub-id></nlm-citation></ref><ref id="ref82"><label>82</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goirand</surname><given-names>M</given-names> </name><name name-style="western"><surname>Austin</surname><given-names>E</given-names> </name><name name-style="western"><surname>Clay-Williams</surname><given-names>R</given-names> </name></person-group><article-title>Implementing ethics in healthcare AI-based applications: a scoping review</article-title><source>Sci Eng Ethics</source><year>2021</year><month>09</month><day>3</day><volume>27</volume><issue>5</issue><fpage>5</fpage><pub-id pub-id-type="doi">10.1007/s11948-021-00336-3</pub-id><pub-id pub-id-type="medline">34480239</pub-id></nlm-citation></ref><ref id="ref83"><label>83</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mesk&#x00F3;</surname><given-names>B</given-names> </name><name name-style="western"><surname>Topol</surname><given-names>EJ</given-names> </name></person-group><article-title>The imperative for regulatory oversight of large language models (or generative AI) in healthcare</article-title><source>NPJ Digit Med</source><year>2023</year><month>07</month><day>6</day><volume>6</volume><issue>1</issue><fpage>120</fpage><pub-id pub-id-type="doi">10.1038/s41746-023-00873-0</pub-id><pub-id pub-id-type="medline">37414860</pub-id></nlm-citation></ref><ref id="ref84"><label>84</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lareyre</surname><given-names>F</given-names> </name><name name-style="western"><surname>Raffort</surname><given-names>J</given-names> </name></person-group><article-title>Ethical concerns regarding the use of large language models in healthcare</article-title><source>EJVES Vasc Forum</source><year>2024</year><volume>61</volume><fpage>1</fpage><pub-id pub-id-type="doi">10.1016/j.ejvsvf.2023.10.003</pub-id><pub-id pub-id-type="medline">38025830</pub-id></nlm-citation></ref><ref id="ref85"><label>85</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reddy</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rogers</surname><given-names>W</given-names> </name><name name-style="western"><surname>Makinen</surname><given-names>VP</given-names> </name><etal/></person-group><article-title>Evaluation framework to guide implementation of AI systems into healthcare settings</article-title><source>BMJ Health Care Inform</source><year>2021</year><month>10</month><volume>28</volume><issue>1</issue><fpage>1</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1136/bmjhci-2021-100444</pub-id><pub-id pub-id-type="medline">34642177</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Detailed methodological quality appraisal results for each included study using the Mixed Methods Appraisal tool (MMAT).</p><media xlink:href="medinform_v13i1e66905_app1.docx" xlink:title="DOCX File, 78 KB"/></supplementary-material><supplementary-material id="app2"><label>Checklist 1</label><p>PRISMA-ScR checklist.</p><media xlink:href="medinform_v13i1e66905_app2.pdf" xlink:title="PDF File, 154 KB"/></supplementary-material></app-group></back></article>