<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v14i1e86965</article-id><article-id pub-id-type="doi">10.2196/86965</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Advancing Alzheimer Disease Prediction With Large Language Model&#x2013;Based Linguistic Feature Analysis: Development and Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Hsu</surname><given-names>Ming-Hsia</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Hwang</surname><given-names>San-Yih</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Tsai</surname><given-names>Yi-Hang</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Chang</surname><given-names>Yun-Chi</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Liang</surname><given-names>Chih-Kuang</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Chang</surname><given-names>Chiung-Yun</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Information Management, National Sun Yat-sen University</institution><addr-line>No. 70, Lienhai Rd</addr-line><addr-line>Kaohsiung</addr-line><country>Taiwan</country></aff><aff id="aff2"><institution>Department of Information Systems, Kaohsiung Municipal United Hospital</institution><addr-line>Kaohsiung</addr-line><country>Taiwan</country></aff><aff id="aff3"><institution>Division of Neurology, Kaohsiung Veterans General Hospital</institution><addr-line>Kaohsiung</addr-line><country>Taiwan</country></aff><aff id="aff4"><institution>Center for Geriatrics and Gerontology, Kaohsiung Veterans General Hospital</institution><addr-line>Kaohsiung</addr-line><country>Taiwan</country></aff><aff id="aff5"><institution>Center for Healthy Longevity and Aging Sciences, National Yang Ming Chiao Tung University</institution><addr-line>Taipei</addr-line><country>Taiwan</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Sen</surname><given-names>Anando</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Baladhandapani</surname><given-names>Arunadevi</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Matos</surname><given-names>David Manuel Martins de</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Shah</surname><given-names>Namra Bhadreshkumar</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to San-Yih Hwang, PhD, Department of Information Management, National Sun Yat-sen University, No. 70, Lienhai Rd, Kaohsiung, 804201, Taiwan, +886-7-5252000 ext 4723; <email>syhwang@mis.nsysu.edu.tw</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>28</day><month>5</month><year>2026</year></pub-date><volume>14</volume><elocation-id>e86965</elocation-id><history><date date-type="received"><day>02</day><month>11</month><year>2025</year></date><date date-type="rev-recd"><day>20</day><month>04</month><year>2026</year></date><date date-type="accepted"><day>21</day><month>04</month><year>2026</year></date></history><copyright-statement>&#x00A9; Ming-hsia Hsu, San-Yih Hwang, Yi-Hang Tsai, Yun-Chi Chang, Chih-Kuang Liang, Chiung-Yun Chang. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 28.5.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2026/1/e86965"/><abstract><sec><title>Background</title><p>Alzheimer disease (AD) is a progressive neurodegenerative disorder with rapidly growing global prevalence. Early detection is critical for timely intervention; yet, conventional diagnostic methods remain costly and invasive. Speech-based assessment has emerged as a noninvasive alternative, as AD characteristically impairs linguistic abilities including fluency, coherence, and informational content. Recent advances in large language models (LLMs) offer new opportunities to extract structured linguistic features from transcribed speech for automated AD classification. However, existing LLM-based approaches often lack transparency and clinical interpretability, limiting their adoption in clinical workflows.</p></sec><sec><title>Objective</title><p>This study aims to investigate the influence of linguistic features extracted from transcribed speech, as analyzed by LLMs, on the accuracy and interpretability of AD prediction.</p></sec><sec sec-type="methods"><title>Methods</title><p>We propose a framework that leverages LLMs to analyze linguistic features extracted from transcribed speech for AD classification. Our approach focuses on 4 key aspects, including readability, fluency, richness of detail, and keyword relevance. To enhance classification accuracy, the framework integrates transcript embeddings with feature explanation embeddings, forming a comprehensive linguistic representation. We conducted extensive ablation studies to evaluate the contributions of individual features and benchmarked our framework against existing LLM-driven methodologies through pairwise explainability evaluations. Output stability was assessed across 3 independent pipeline runs. A fully local configuration (Llama 3 8B + nomic-embed-text) was tested to evaluate privacy-preserving deployment feasibility. Explainability was assessed via LLM-based pairwise comparison (Gemini-3.1-flash-lite) against the method of Bang et al across 54 correctly classified cases and by blinded evaluation from 2 neurologists.</p></sec><sec sec-type="results"><title>Results</title><p>The proposed framework achieved a mean precision of 91.52%, a sensitivity of 91.08%, a specificity of 96.29%, and <italic>F</italic><sub>1</sub>-score of 91.05% across 3 independent runs on the ADReSSo 2021 dataset, outperforming existing LLM-based approaches. A fully-local configuration (Llama 3 8B+nomic-embed-text, requiring no cloud application programming interface access) achieved an <italic>F</italic><sub>1</sub>-score of 81.58%, demonstrating framework transferability to privacy-preserving deployment environments. Keyword relevance was the most influential feature (<italic>F</italic><sub>1</sub>-score drop of 13.22 pp when removed). Explainability evaluations showed our method was preferred in 49 out of 54 cases via Gemini-3.1-flash-lite, with human experts preferring our method in 89 of 108 blinded assessments.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>These findings highlight that a structured linguistic feature analysis using LLMs provides a robust and interpretable framework for preliminary AD detection. Our approach offers a scalable and accessible solution that bridges artificial intelligence&#x2013;driven text analysis with clinical applications, supporting early detection of cognitive decline through noninvasive assessment methods.</p></sec></abstract><kwd-group><kwd>alzheimer disease</kwd><kwd>large language models</kwd><kwd>linguistic features</kwd><kwd>prompt engineering</kwd><kwd>early detection</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Alzheimer disease (AD) is a progressive neurodegenerative disorder that primarily affects older adults. The global prevalence of AD is significant, as the number of people living with dementia is projected to increase from 55 million in 2019 to 139 million by 2050, according to the 2023 World Health Organization (WHO) report [<xref ref-type="bibr" rid="ref1">1</xref>]. In the United States alone, as of 2024, approximately 6.9 million individuals aged 65 years and older are affected by Alzheimer dementia [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. These alarming figures highlight the critical need for early detection and intervention strategies to improve patient outcomes, optimize treatment strategies, and reduce the burden on health care systems [<xref ref-type="bibr" rid="ref4">4</xref>].</p><p>Early diagnosis of dementia offers substantial benefits, enabling individuals to make informed decisions, access essential services and treatments, and implement preventive measures to maintain safety and quality of life [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Patients, caregivers, and researchers highly value early detection due to its role in supporting life planning, improving end-of-life care, and facilitating research on disease pathology before advanced neuronal damage occurs [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Furthermore, early diagnosis may promote lifestyle modifications that could potentially delay or prevent the onset of AD, offering a proactive approach to health management [<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>Although neuroimaging and cerebrospinal fluid biomarkers are considered the gold standard for early detection, their limitations&#x2014;including high costs, limited availability, and invasive procedures&#x2014;create accessibility challenges, particularly in resource-limited settings [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Plasma biomarkers are emerging as promising, noninvasive alternatives that could improve accessibility and reduce wait times for specialized care [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. However, they also face limitations, including variability in results, lower specificity compared to traditional biomarkers, and standardization challenges [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Additionally, dementia risk scores, such as Cardiovascular Risk Factors, Aging, and Dementia (CAIDE) and the Australian National University Alzheimer Disease Risk Index (ANU-ADRI), provide valuable support for early identification, though ongoing refinement is required for broader clinical application [<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>Integrating routine cognitive assessments in primary care can further enhance early detection rates, promoting proactive dementia management and improved patient outcomes [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>]. Recent technological developments, including computerized adaptive testing (CAT), enhance the flexibility of digital assessments by tailoring testing experiences based on individual cognitive capabilities [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Additionally, digital cognitive assessments enable the discovery and longitudinal monitoring of novel digital biomarkers outside traditional clinical settings [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. These tools offer significant advantages over traditional paper-based methods in both clinical practice and research, demonstrating increased sensitivity in detecting subtle cognitive changes that conventional assessments might overlook [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref20">20</xref>]. Digital assessments also provide greater reliability and validity through repeated measurements across multiple days, yielding more consistent intra- and interparticipant data compared to single-time-point traditional assessments [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>].</p><p>The accessibility of digital cognitive assessments is another key advantage, as they allow remote self-administration, reducing the need for in-person clinic visits and improving access for underserved populations [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Additionally, these digital methods offer high ecological validity by enabling frequent, brief assessments in real-world settings, supporting a patient-centered approach, and generating data that better reflects daily cognitive functioning [<xref ref-type="bibr" rid="ref20">20</xref>]. Their cost-effectiveness and efficiency make them particularly suitable for large-scale studies and clinical trials requiring scalable solutions [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref22">22</xref>].</p><p>AD is increasingly recognized for its impact on cognitive function, particularly memory impairment as an early hallmark. However, language impairment is also a notable early symptom that significantly hinders communication [<xref ref-type="bibr" rid="ref23">23</xref>]. Studies indicate that integrating language assessments into conventional cognitive evaluations enhances the precision of AD progression prediction [<xref ref-type="bibr" rid="ref24">24</xref>]. As cognitive decline advances, linguistic tasks become more challenging, highlighting the importance of understanding language dynamics in this patient population. Moreover, the distinct relationship between language deficits and cognitive deterioration underscores the potential of linguistic measures as critical markers for assessing the progression from mild cognitive impairment (MCI) to AD [<xref ref-type="bibr" rid="ref25">25</xref>]. Recent findings suggest that language assessments can not only identify individuals at greater risk for developing AD but also aid in monitoring the severity of language impairments as the disease progresses [<xref ref-type="bibr" rid="ref26">26</xref>]. Incorporating language assessment into routine cognitive evaluations could improve early detection strategies, enabling targeted interventions designed to help maintain communication abilities. Thus, recognizing and addressing language impairments in early AD stages could play a pivotal role in patient care and management.</p><p>In recent years, deep learning-based approaches have gained traction for automating feature extraction by learning complex representations from speech data. Various speech embeddings, such as VGGish, X-vectors, and Wav2Vec [<xref ref-type="bibr" rid="ref27">27</xref>], along with language embeddings like Bidirectional Encoder Representations from Transformers (BERT), Robustly Optimized BERT Pretraining Approach (RoBERTa), and GPT, have been used to capture rich acoustic and linguistic information for AD detection [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref31">31</xref>]. Moreover, studies demonstrate that combining acoustic and linguistic features enhances AD detection performance by integrating multiple aspects of speech and language affected by cognitive decline [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>].</p><p>Despite the strong performance of deep learning-based approaches, they face significant challenges regarding explainability and interpretability. A systematic review conducted by Shi et al [<xref ref-type="bibr" rid="ref31">31</xref>] analyzed 72 studies and found that most only provided selective examples for their deep learning models, with very few explicitly addressing explainability. However, in clinical settings, there is a growing need for simpler yet interpretable methods for preliminary AD screening. Recent developments in large language models (LLMs) have demonstrated promise in analyzing speech transcripts from the Cookie Theft picture description task, with studies reporting accuracies of 80.3% using GPT-3 [<xref ref-type="bibr" rid="ref35">35</xref>] (OpenAI). Notably, Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>] developed a method incorporating LLM-driven explainability, achieving 85.92% accuracy using GPT-4 (OpenAI).</p><p>In this research, we expand on the explainability capabilities of LLMs and systematically refine their prompt design. We guide the LLM to evaluate 4 key linguistic dimensions&#x2014;readability, fluency, richness of detail, and keyword relevance&#x2014;before making a diagnosis. This approach enhances both accuracy and interpretability. Our proposed framework also introduces a structured prompt template that directs LLMs in systematic linguistic analysis. Our design includes (1) expert role definition as a speech therapist, (2) specific task instructions for feature analysis, (3) standardized feature definitions, and (4) exemplar demonstrations.</p><p>Using Whisper (OpenAI) for speech-to-text conversion and GPT-4 with our specialized prompt template, our framework addresses a binary classification task<bold>&#x2014;</bold>distinguishing individuals with AD from cognitively control normal (CN)&#x2014;and achieved around 91% precision and recall, with 96% specificity on the ADReSSo 2021 dataset, demonstrating that structured linguistic analysis alone can support reliable preliminary AD screening. Key contributions of this work include:</p><list list-type="order"><list-item><p>Proposing a novel framework integrating LLM-based linguistic analysis with structured feature evaluation and prompt engineering for generating explainable AD diagnosis.</p></list-item><list-item><p>Developing an AD classifier that incorporates both transcript content and LLM-explained features to enhance prediction accuracy.</p></list-item><list-item><p>Providing empirical evidence of stable and competitive diagnostic accuracy (<italic>F</italic><sub>1</sub>-score=91.05%; sensitivity=91.08%; specificity=96.29%) on the ADReSSo 2021 Challenge dataset using speech transcripts from the Cookie Theft picture description task [<xref ref-type="bibr" rid="ref36">36</xref>].</p></list-item><list-item><p>Demonstrating superior explainability through a structured multifeature framework, significantly outperforming an existing approach in diagnostic reasoning, evidence support, and clinical insight, winning 49 out of 54 pairwise evaluations via Gemini-3.1-flash-lite.</p></list-item></list></sec><sec id="s1-2"><title>Related Work</title><p>Before the advent of ChatGPT (OpenAI) in late 2022, most deep learning-based research for AD detection predominantly used BERT as the underlying machine learning framework. In these studies, linguistic features and language embeddings served as inputs to BERT. Subsequent studies adopted more advanced artificial intelligence (AI) models, such as GPT-3 and GPT-4, for AD detection. Below, we review these studies and highlight their strengths and weaknesses.</p></sec><sec id="s1-3"><title>AD Detection With BERT</title><p>BERT is a transformer-based pretrained language model introduced by Google researchers in 2018 [<xref ref-type="bibr" rid="ref37">37</xref>]. It represents text as a sequence of vectors, which can then be used to train a classification model. The application of BERT for direct transcript embedding has shown promising results in several studies. For instance, Padhee et al [<xref ref-type="bibr" rid="ref38">38</xref>] applied BERT to raw transcriptions, achieving an <italic>F</italic><sub>1</sub>-score of 80% for classifying patients with AD, MCI, and, while Rohanian et al [<xref ref-type="bibr" rid="ref39">39</xref>] highlighted BERT&#x2019;s adaptability to variations in transcription quality.</p><p>For feature-enriched embedding approaches, researchers have enhanced BERT&#x2019;s capabilities by incorporating additional linguistic features. Mahajan and Baths [<xref ref-type="bibr" rid="ref40">40</xref>] improved multimodal classification by integrating lexical diversity and syntactic complexity features with BERT embeddings. Qiao et al [<xref ref-type="bibr" rid="ref41">41</xref>] focused on model explainability by combining fluency and disfluency features, while Yuan et al [<xref ref-type="bibr" rid="ref42">42</xref>] examined semantic similarity and information density as complementary inputs to increase diagnostic accuracy.</p><p>Despite these successes, BERT-based methods face key challenges; direct embedding approaches lack direct clinical explainability, while feature-enriched methods introduce additional complexity and require domain expertise.</p></sec><sec id="s1-4"><title>AD Detection With Advanced LLM</title><p>The emergence of more advanced AI models, such as GPT-3 and GPT-4, has transformed AD detection through innovative approaches to linguistic feature extraction and analysis. Compared to earlier models like BERT, these LLMs excel at classification tasks, even with minimal or no additional training data, while also providing textual explanations.</p><p>Agbavor and Liang [<xref ref-type="bibr" rid="ref35">35</xref>] pioneered the use of GPT-3 for dementia prediction from spontaneous speech, leveraging text embeddings to capture semantic meaning and achieving 80.3% accuracy in distinguishing between patients with AD and CNs. Wang et al [<xref ref-type="bibr" rid="ref43">43</xref>] expanded this research by exploring GPT-4&#x2019;s capabilities for MCI screening, analyzing linguistic indicators via standardized prompts, and achieving 77.3% sensitivity and 83.3% specificity.</p><p>Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>] introduced a novel methodology by using GPT-4 for speech fluency evaluation, integrating AI-generated opinions with original text, and achieving 85.92% accuracy and 94.44% specificity. Additionally, Balamurali and Chen [<xref ref-type="bibr" rid="ref44">44</xref>] conducted a comparative analysis of multiple LLMs in a zero-shot learning context, highlighting both the potential and the limitations of LLM technology in clinical settings at the time of this writing.</p><p>These developments illustrate the progression from basic text analysis to sophisticated diagnostic tools, while also emphasizing the need for standardized prompt engineering and clinical validation. This growing body of research suggests that well-structured LLM approaches could provide valuable support for preliminary AD screening while maintaining interpretability for health care professionals.</p><p>However, these LLM-based approaches face two major limitations:</p><list list-type="order"><list-item><p>Lack of transparency and interpretability: many LLM-driven approaches do not clearly specify the evaluation process. For instance, while Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>] used GPT-4 to assess speech fluency, they did not define which fluency aspects were being measured. Similarly, Agbavor and Liang&#x2019;s [<xref ref-type="bibr" rid="ref35">35</xref>] approach using GPT-3 embeddings lacked clarity regarding the specific semantic features captured.</p></list-item><list-item><p>Absence of a structured analysis framework: previous studies varied significantly in their methodological approaches, lacking a standardized framework for LLM-based analysis. Although Balamurali and Chen [<xref ref-type="bibr" rid="ref44">44</xref>] specified multiple linguistic aspects for evaluation, their LLM-based approach remained exploratory, without establishing scoring criteria or standardization. This lack of methodological structure limits the clinical applicability of these models.</p></list-item></list><p>These limitations underscore the need for a systematic and interpretable approach to LLM-based AD detection, one that integrates structured feature evaluation with explainability. In this work, we address these challenges by developing a framework that enhances transparency, interpretability, and clinical relevance.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>Before introducing our methodology, we first describe the dataset used in this research. Our approach begins with extracting AD-relevant linguistic features using an LLM. We meticulously design prompts to guide the LLM in analyzing linguistic patterns associated with AD. The resulting feature explanations, along with their corresponding transcripts, are then used to construct an AD classifier. Detailed descriptions are provided in the following subsections.</p></sec><sec id="s2-2"><title>Dataset</title><p>The primary dataset used in this study is the ADReSSo 2021 Challenge corpus [<xref ref-type="bibr" rid="ref45">45</xref>], derived from DementiaBank [<xref ref-type="bibr" rid="ref46">46</xref>]. This dataset contains audio recordings of participants describing the Cookie Theft picture from the Boston Diagnostic Aphasia Examination (BDAE) [<xref ref-type="bibr" rid="ref47">47</xref>], a standardized task widely used in cognitive assessments. The dataset has been carefully balanced to address common demographic biases in medical datasets, ensuring matched distributions of age and gender between groups.</p><p>The dataset comprises 166 training samples, including 87 AD cases and 79 CNs, and 71 test samples, including 35 AD cases. All audio recordings underwent preprocessing to ensure consistency. The dataset&#x2019;s balanced structure mitigates the common issue of group imbalance in clinical datasets, making it particularly suitable for developing and evaluating AD classification models.</p></sec><sec id="s2-3"><title>The Framework</title><p>The overall framework of our AD prediction method is illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>The framework of the proposed method that incorporates both transcript and its feature explanations for Alzheimer disease (AD) prediction. AD: Alzheimer disease; CN: control normal; LLM: large language model; MLP: multilayer perceptron.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e86965_fig01.png"/></fig><p>The process consists of the following steps.</p><list list-type="order"><list-item><p>Speech-to-text conversion performed by Whisper.</p></list-item><list-item><p>Speaker attribution to extract the participant&#x2019;s description from the transcript using an LLM-based approach.</p></list-item></list><p>The first 2 preprocessing steps are described in the Preprocessing subsection. The examiner-free transcript is then analyzed by an LLM, which generates feature-based explanations across 4 linguistic categories. These features are detailed in the AD Related Linguistic Feature section. Both the transcript and its corresponding explanations are converted into <italic>n</italic>-dimensional vector representations. These embeddings are then concatenated into a 2<italic>n</italic>-dimensional vector, integrating both semantic content and feature-based assessment. A multilayer perceptron (MLP) classifier processes the final representation to classify participants into AD or CN groups.</p></sec><sec id="s2-4"><title>Preprocessing</title><p>Our preprocessing pipeline consists of 2 essential steps to derive transcripts from the dataset.</p><list list-type="order"><list-item><p>Speech-to-text conversion: all audio recordings were transcribed using the Whisper automatic speech recognition (ASR) system [<xref ref-type="bibr" rid="ref48">48</xref>].</p></list-item><list-item><p>Speaker attribution: since the raw transcripts contain dialogues between examiners and participants, this step aims to isolate the participant&#x2019;s descriptions by removing examiner interventions (eg, &#x201C;What&#x2019;s happening in that picture?&#x201D;). We compared 2 methods: an LLM-based speaker attribution method and a conventional speaker diarization method using PyAnnote (pyannoteAI). Experimental results on the ADReSSo 2021 dataset show that the LLM-based speaker attribution method yields better performance (<italic>F</italic><sub>1</sub>-score=0.82) compared to the conventional speaker diarization method (<italic>F</italic><sub>1</sub>-score=0.92). We observe that the speaker diarization method using PyAnnote [<xref ref-type="bibr" rid="ref49">49</xref>] was prone to failure or speaker misidentification in low-volume recordings and cases where examiner and participant voices were acoustically similar, resulting in incomplete or contaminated transcripts. The LLM-based method, by contrast, leverages semantic content to reliably identify and remove examiner turns regardless of audio quality, preserving richer participant descriptions. Based on these results, the LLM-based approach was adopted for subsequent analyses. Readers are referred to <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for the detailed prompt design.</p></list-item></list></sec><sec id="s2-5"><title>AD-Related Linguistic Feature Analysis With LLM</title><p>This study identifies 4 key linguistic features&#x2014;readability, fluency, richness of detail, and keyword relevance&#x2014;as critical for detecting AD from speech. These features were derived from a comprehensive review of linguistic studies and correspond to established categories in speech and language processing, including syntactic, semantic, lexical, disfluency, and pragmatic features. Each feature captures a distinct aspect of language impairment observed in AD, offering a structured framework for assessing cognitive decline. <xref ref-type="table" rid="table1">Table 1</xref> summarizes these features, their relevant linguistic categories, primary focus, quantification metrics, and key references.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Linguistic features for Alzheimer disease (AD) detection.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Feature</td><td align="left" valign="bottom">Relevant linguistic category</td><td align="left" valign="bottom">Core focus</td><td align="left" valign="bottom">Metrics</td><td align="left" valign="bottom">Key references</td></tr></thead><tbody><tr><td align="left" valign="top">Readability</td><td align="left" valign="top">Syntactic and lexical</td><td align="left" valign="top">Evaluating syntactic complexity, lexical diversity, and discourse coherence to reflect the organization of speech.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Lexical diversity (Type-Token Ratio; TTR)</p></list-item><list-item><p>Syntactic complexity (dependency parsing)</p></list-item><list-item><p>Discourse coherence (Coh-Metrix indices)</p></list-item></list></td><td align="left" valign="top">[<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref40">40</xref>-<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>]</td></tr><tr><td align="left" valign="top">Fluency</td><td align="left" valign="top">Disfluencies</td><td align="left" valign="top">Measuring smoothness and flow of language, including hesitations, filled pauses, and repetitions.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Pause frequency and duration</p></list-item><list-item><p>Speech rate</p></list-item><list-item><p>Self-repair rates</p></list-item></list></td><td align="left" valign="top">[<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref52">52</xref>-<xref ref-type="bibr" rid="ref54">54</xref>]</td></tr><tr><td align="left" valign="top">Richness of detail</td><td align="left" valign="top">Semantic and pragmatic</td><td align="left" valign="top">Assessing the density and specificity of meaningful content in descriptions.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Proportion of information-bearing nouns and verbs</p></list-item><list-item><p>Content density (Latent Semantic Analysis)</p></list-item><list-item><p>Ratio of semantically empty words</p></list-item></list></td><td align="left" valign="top">[<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>]</td></tr><tr><td align="left" valign="top">Keyword<break/>relevance</td><td align="left" valign="top">Semantic</td><td align="left" valign="top">Evaluating alignment of spoken content with predefined key elements in a given context (eg, the Cookie Theft image).</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Frequency of target keywords</p></list-item><list-item><p>Semantic similarity (cosine similarity and topic modeling)</p></list-item><list-item><p>Neural attention weights for key terms.</p></list-item></list></td><td align="left" valign="top">[<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]</td></tr></tbody></table></table-wrap><p>Readability encompasses syntactic complexity, lexical diversity, and discourse coherence, all of which are essential for evaluating the organization and comprehensibility of speech. Studies indicate that patients with AD often produce grammatically simplified sentences, shorter sentence segments, and less coherent discourse, reflecting a decline in their ability to construct complex and information-dense narratives [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref50">50</xref>]. Traditional readability assessments rely on dependency parsers and Coh-Metrix indices, but LLMs provide a holistic alternative by integrating syntactic and lexical patterns into a unified framework [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref51">51</xref>].</p><p>Fluency captures the smoothness and temporal flow of speech, focusing on pauses, hesitations, and repetitions. Patients with AD frequently exhibit disfluencies due to word retrieval challenges and sentence formulation difficulties, which signal cognitive decline [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>]. Common metrics include speech rate, pause frequency, and self-repair rates. LLMs enhance fluency analysis by automatically detecting patterns within transcripts, offering a scalable and nuanced approach to fluency evaluation [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref52">52</xref>].</p><p>Richness of detail refers to the density of meaningful and specific information within a description. Studies indicate that patients with AD tend to provide fewer information-bearing propositions, relying on vague or semantically empty words [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>]. This reflects impairments in semantic memory and access to stored knowledge. Metrics such as latent semantic analysis and content density analysis quantify these deficits. LLMs enable dynamic evaluations by assessing the use of descriptive language and narrative coherence, complementing traditional feature extraction methods [<xref ref-type="bibr" rid="ref42">42</xref>].</p><p>Keyword relevance evaluates the degree to which spoken content aligns with predefined key elements of a context or scenario, such as the Cookie Theft picture used in our study. The patients with AD often omit critical objects or actions, reflecting impairments in lexical retrieval and semantic memory [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>]. Neural attention models have shown that diagnostically relevant keywords (eg, &#x201C;sink,&#x201D; &#x201C;water,&#x201D; and &#x201C;cookie&#x201D;) appear significantly less frequently in the descriptions of patients with AD[<xref ref-type="bibr" rid="ref57">57</xref>]. LLMs improve keyword analysis by leveraging attention mechanisms to quantify the inclusion and contextual relevance of key terms [<xref ref-type="bibr" rid="ref58">58</xref>].</p><p>These 4 linguistic features collectively address syntactic, lexical, semantic, and pragmatic aspects of AD-related language impairments. Traditional methods rely heavily on manual feature engineering and domain-specific tools, whereas LLMs provide an automated and holistic approach by integrating structured, semantic, and pragmatic analyses via carefully designed prompts. This LLM-driven approach provides a scalable and interpretable method for analyzing speech, aligning with recent advancements in biomedical informatics for dementia diagnosis [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref59">59</xref>].</p></sec><sec id="s2-6"><title>Prompt-Template Design</title><p>We developed a structured prompt template to facilitate consistent and comprehensive LLM-based analysis of Cookie Theft picture descriptions. The complete template is provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The template is designed hierarchically, consisting of 4 key components (illustrated in <xref ref-type="fig" rid="figure2">Figure 2</xref>).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>The hierarchical structure of the prompt template, illustrating the relationships between components and their respective subelements. AD: Alzheimer disease.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e86965_fig02.png"/></fig><p>The 4 key components are:</p><list list-type="order"><list-item><p>Role definition: establishes the expert context to ensure that the LLM provides clinically relevant responses [<xref ref-type="bibr" rid="ref60">60</xref>]. This component positions the LLM as a speech and language therapist with expertise in identifying language dysfunctions in individuals with cognitive impairment.</p></list-item><list-item><p>Task specification: provides analysis instructions and scoring guidelines to ensure structured evaluations. The analysis instructions explain that language dysfunctions in AD often arise from compromised semantic and pragmatic processing abilities and that the Cookie Theft picture description task is specifically designed to assess cognitive function and memory. The scoring guidelines direct the LLM to evaluate each feature on a standardized 1&#x2010;7 scale, where higher scores indicate better cognitive function. These guidelines emphasize the importance of providing detailed explanations and specific evidence from the transcript to support each feature assessment.</p></list-item><list-item><p>Feature definition: features to be evaluated include readability (syntactic complexity and comprehensibility), fluency (speech smoothness and coherence), richness of detail (information density and specificity), and keyword relevance (inclusion of essential elements from the Cookie Theft picture). Each feature definition includes brief descriptions of how these linguistic elements typically manifest in patients with AD compared to healthy individuals.</p></list-item><list-item><p>Exemplar demonstrations: the template includes 2 contrastive examples to calibrate the analysis, including a control example, generated from a participant with high cognitive function, and an AD example, produced by a participant with AD. The control example consists of a detailed, well-structured description demonstrating high scores across all features, whereas the AD example involves a description showing typical patterns of cognitive decline, characterized by fragmented expression and limited detail. Refer to <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for the full prompt and an example of AD-related linguistic feature analysis generated by the LLM.</p></list-item></list></sec><sec id="s2-7"><title>Ethical Considerations</title><p>This study uses the publicly available ADReSSo 2021 dataset from the DementiaBank corpus, which does not involve direct patient contact or new clinical data collection. Institutional Review Board (IRB) approval (25-CT1-02[241015-2]) was obtained through an expedited review at Kaohsiung Veterans General Hospital, covering the methodological framework development and serving as a prerequisite for the associated clinical pilot study. No additional ethics approval was required for use of the publicly available dataset.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Configuration</title><p>We developed a multicomponent framework for AD detection, with each component specifically configured. We adopted GPT-4 [<xref ref-type="bibr" rid="ref61">61</xref>] as the large language model in our experiments. For the embedding process, we used OpenAI&#x2019;s text-embedding-ada-002 [<xref ref-type="bibr" rid="ref61">61</xref>] model to generate embeddings from both transcript content and feature explanations. Each component produced a 1536-dimensional vector, resulting in a concatenated 3072-dimensional vector for the subsequent classification task. For the classification model, we implemented a 2-layer MLP architecture. The first layer transformed the 3072-dimensional input into 512 hidden units with rectified linear unit activation and dropout (rate=0.1), followed by a second layer outputting binary classification probabilities. The model was trained using the Adam optimizer [<xref ref-type="bibr" rid="ref62">62</xref>], a batch size of 8, a learning rate of 0.01, and ran for 50 epochs. We used 5-fold cross-validation for model evaluation, with each fold maintaining balanced AD/Control ratios, using the 176 training samples of the ADReSSo 2021 dataset. Cross-entropy loss was used as the optimization criterion.</p></sec><sec id="s3-2"><title>Performance Results</title><p>The proposed framework was benchmarked against 3 comparative approaches that used LLMs for AD detection from speech transcripts using the ADReSSo 2021 dataset.</p><p>We compared against 2 published methods, including Agbavor and Liang [<xref ref-type="bibr" rid="ref35">35</xref>], who derived semantic representations using GPT-3 embeddings, and Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>], who combined GPT-4-based fluency assessments with transcript embeddings.</p><p>To assess output stability under the inherent stochasticity of LLM-based feature generation, we conducted 3 independent runs of the full pipeline using identical configurations and report the mean results, which are shown in <xref ref-type="table" rid="table2">Table 2</xref>. The mean <italic>F</italic><sub>1</sub>-score of 91.05% and mean specificity of 96.29% demonstrate consistent performance across runs. Importantly, the mean <italic>F</italic><sub>1</sub>-score exceeds that reported in Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>] (85.80%) by 5.25 percentage points, confirming that the framework maintains its advantage over existing methods even under stochastic variation.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Performance comparison of different LLM<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>-based approaches for AD<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> detection.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Research</td><td align="left" valign="bottom">LLM</td><td align="left" valign="bottom">Approach</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score%</td><td align="left" valign="bottom">Accuracy%</td><td align="left" valign="bottom">Precision%</td><td align="left" valign="bottom">Sensitivity%</td><td align="left" valign="bottom">Specificity%</td></tr></thead><tbody><tr><td align="left" valign="top">Agbavor.and Liang<break/>(2022) [<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">GPT3</td><td align="left" valign="top">Semantic embeddings</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">80.3</td><td align="left" valign="top">80.6</td><td align="left" valign="top">80.6</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">Bang et al<break/>(2024) [<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">GPT4</td><td align="left" valign="top">Fluency assessment<break/>+ embeddings</td><td align="left" valign="top">85.80</td><td align="left" valign="top">85.92</td><td align="left" valign="top">86.94</td><td align="left" valign="top">85.92</td><td align="left" valign="top">94.44</td></tr><tr><td align="left" valign="top">Our proposed framework</td><td align="left" valign="top">GPT4</td><td align="left" valign="top">Multifeature embeddings + MLP<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">91.05</td><td align="left" valign="top">91.08</td><td align="left" valign="top">91.52</td><td align="left" valign="top">91.08</td><td align="left" valign="top">96.29</td></tr><tr><td align="left" valign="top">Our proposed framework<break/>Llama 3 8B+nomic-embed (fully local)</td><td align="left" valign="top">Llama 3</td><td align="left" valign="top">Multifeature embeddings + MLP<break/>(fully local, no cloud API<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup>)</td><td align="left" valign="top">81.58</td><td align="left" valign="top">81.69</td><td align="left" valign="top">82.30</td><td align="left" valign="top">81.69</td><td align="left" valign="top">88.89</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>LLM: large language model.</p></fn><fn id="table2fn2"><p><sup>b</sup>AD: Alzheimer disease.</p></fn><fn id="table2fn3"><p><sup>c</sup>Not applicable.</p></fn><fn id="table2fn4"><p><sup>d</sup>MLP: multilayer perceptron.</p></fn><fn id="table2fn5"><p><sup>e</sup>API: application programming interface.</p></fn></table-wrap-foot></table-wrap><p>To evaluate feasibility for privacy-sensitive clinical environments where cloud-based application programming interfaces may not be permissible, we tested a fully local configuration using Llama 3 (8B) [<xref ref-type="bibr" rid="ref63">63</xref>] for feature extraction and nomic-embed-text [<xref ref-type="bibr" rid="ref64">64</xref>] for embedding generation, requiring no cloud application programming interface access. This configuration achieved an <italic>F</italic><sub>1</sub>-score of 81.58% and a specificity of 88.89%, as shown in the last row in <xref ref-type="table" rid="table2">Table 2</xref>. The performance gap relative to the proposed framework&#x2019;s mean (9.47 pp=91.05% mean <italic>F</italic><sub>1</sub>-score) highlights the capability limitations of the fully local configuration, specifically the LLM and embedding model used, namely, Llama 3 (8B) and nomic-embed-text. Note that in the future, as more capable open-source LLMs and embedding models mature, the performance of fully local deployments is expected to improve substantially.</p><p>Compared to previous LLM-based methods, our framework demonstrates substantial improvements. The enhancement over Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>], which also uses GPT-4, is particularly notable, with improvements of approximately 5 percentage points in accuracy and <italic>F</italic><sub>1</sub>-score and 1.9 percentage points in specificity. This improvement can be attributed to our systematic integration of 4 distinct linguistic features (readability, fluency, richness of detail, and keyword relevance) and the combination of transcript and feature explanation embeddings.</p><p>The mean specificity (96.29%) achieved by our framework is particularly significant in clinical contexts, as it indicates strong capability to correctly identify non-AD cases, thereby reducing false positives in preliminary screening scenarios. This specificity exceeds Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>] (94.44%), highlighting the clinical utility of our structured multifeature approach for reducing unnecessary follow-up.</p></sec><sec id="s3-3"><title>Ablation Study</title><p>To assess the contribution of individual components in our framework, we conducted comprehensive ablation experiments across 3 key aspects, namely the impact of individual features, the effect of embedding combinations, and the consequence of prompt design. All ablation experiments were conducted using a single fixed pipeline run to ensure that observed performance differences reflect the contribution of each component rather than run-to-run stochastic variation. The results are shown in <xref ref-type="table" rid="table3">Table 3</xref>.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Ablation study results: Impact of individual linguistic features on model performance.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Configuration</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">Accuracy</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Sensitivity</td><td align="left" valign="bottom">Specificity</td></tr></thead><tbody><tr><td align="left" valign="top">Full model</td><td align="left" valign="top">91.52</td><td align="left" valign="top">91.55</td><td align="left" valign="top">92.07</td><td align="left" valign="top">91.55</td><td align="left" valign="top">97.22</td></tr><tr><td align="left" valign="top">Without readability</td><td align="left" valign="top">85.90</td><td align="left" valign="top">85.92</td><td align="left" valign="top">86.02</td><td align="left" valign="top">85.92</td><td align="left" valign="top">88.89</td></tr><tr><td align="left" valign="top">Without fluency</td><td align="left" valign="top">88.73</td><td align="left" valign="top">88.73</td><td align="left" valign="top">88.73</td><td align="left" valign="top">88.73</td><td align="left" valign="top">88.89</td></tr><tr><td align="left" valign="top">Without richness of detail</td><td align="left" valign="top">84.50</td><td align="left" valign="top">84.51</td><td align="left" valign="top">84.53</td><td align="left" valign="top">84.51</td><td align="left" valign="top">86.11</td></tr><tr><td align="left" valign="top">Without keyword relevance</td><td align="left" valign="top">78.3</td><td align="left" valign="top">78.87</td><td align="left" valign="top">81.87</td><td align="left" valign="top">78.87</td><td align="left" valign="top">94.44</td></tr></tbody></table></table-wrap></sec><sec id="s3-4"><title>Individual Feature Impact</title><p>The full model (<italic>F</italic><sub>1</sub>-score=91.52%) in <xref ref-type="table" rid="table3">Table 3</xref> represents the result of the same fixed run used across all ablation conditions, providing a controlled baseline for comparing the effect of each component removal. We evaluated the importance of each linguistic feature by removing them one at a time, with the results shown in <xref ref-type="table" rid="table3">Table 3</xref>. Our findings show that all features contributed greatly to the model&#x2019;s performance, with keyword relevance being particularly crucial (dropping from 91.52% to 78.30% <italic>F</italic><sub>1</sub>-score when removed). The other features showed smaller yet still notable impacts: readability (<italic>F</italic><sub>1</sub>-score=85.90%), fluency (<italic>F</italic><sub>1</sub>-score=88.73%), and richness of detail (<italic>F</italic><sub>1</sub>-score=84.50%).</p></sec><sec id="s3-5"><title>Embedding Combination Effect</title><p>Our approach uses both transcript and feature embeddings. We intend to investigate the impact of each embedding type separately. Our findings, shown in <xref ref-type="table" rid="table4">Table 4</xref>, reveal that the use of both text and feature embeddings significantly outperformed those using either type of embeddings alone. The full model achieved a 91.52% <italic>F</italic><sub>1</sub>-score, while using only text embeddings or feature embeddings yielded 77.44% and 57.70%, respectively. This demonstrates the complementary nature of these representations.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Impact of different embedding configurations and prompt structures on model performance.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Configuration</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">Accuracy</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Sensitivity</td><td align="left" valign="bottom">Specificity</td></tr></thead><tbody><tr><td align="left" valign="top">Full model</td><td align="left" valign="top">91.52</td><td align="left" valign="top">91.55</td><td align="left" valign="top">92.07</td><td align="left" valign="top">91.55</td><td align="left" valign="top">97.22</td></tr><tr><td align="left" valign="top" colspan="6">Embedding configuration</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Text embedding only (1536 dim)</td><td align="left" valign="top">77.44</td><td align="left" valign="top">77.46</td><td align="left" valign="top">77.53</td><td align="left" valign="top">77.46</td><td align="left" valign="top">80.56</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Feature embedding only (1536 dim)</td><td align="left" valign="top">57.70</td><td align="left" valign="top">57.75</td><td align="left" valign="top">57.74</td><td align="left" valign="top">57.75</td><td align="left" valign="top">61.11</td></tr><tr><td align="left" valign="top" colspan="6">Prompt structure</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Without few-shot examples</td><td align="left" valign="top">85.86</td><td align="left" valign="top">85.92</td><td align="left" valign="top">86.36</td><td align="left" valign="top">85.92</td><td align="left" valign="top">91.67</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Without feature definitions</td><td align="left" valign="top">85.80</td><td align="left" valign="top">85.92</td><td align="left" valign="top">86.94</td><td align="left" valign="top">85.92</td><td align="left" valign="top">94.44</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Without AD<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> relation descriptions</td><td align="left" valign="top">85.90</td><td align="left" valign="top">85.92</td><td align="left" valign="top">86.02</td><td align="left" valign="top">85.92</td><td align="left" valign="top">88.89</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>AD: Alzheimer disease.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-6"><title>Prompt Design Impact</title><p>Our approach involves a structured prompt with several key elements. In this experiment, we analyzed performance changes after removing specific prompt components. The results, as detailed in <xref ref-type="table" rid="table4">Table 4</xref>, show that the full structured prompt template significantly enhanced performance (<italic>F</italic><sub>1</sub>-score 91.52%), while removing few-shot examples (85.86%), feature definitions (85.80%), or AD relation descriptions (85.90%) led to performance declines.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>To assess the explainability of our structured prompt design, we conducted a pairwise comparison with Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>], which also provides diagnostic explanations for each transcript. Pairwise comparison is a well-established evaluation method in which 2 alternatives are directly compared based on specific criteria. The comparison focused on 54 cases where both methods correctly classified the samples, ensuring a fair assessment that isolates the evaluation of explanation quality. The feature explanations used in this comparison were drawn from a single randomly selected pipeline run, ensuring that the evaluation reflects a representative output rather than a curated best-case result.</p><p>We used Gemini-3.1-flash-lite [<xref ref-type="bibr" rid="ref65">65</xref>] as an independent judge, instead of GPT-4 used in our method, to avoid self-preference, following recent natural language processing practices. This approach enables a cost-efficient, systematic, and consistent evaluation across multiple samples, making it particularly well-suited for comparing natural language explanations [<xref ref-type="bibr" rid="ref66">66</xref>]. We evaluated the explanations using 5 key criteria derived from explainable AI literature [<xref ref-type="bibr" rid="ref67">67</xref>-<xref ref-type="bibr" rid="ref71">71</xref>], namely diagnostic connection, which assesses how well linguistic features are linked to AD diagnosis [<xref ref-type="bibr" rid="ref67">67</xref>]; evidence support, which examines the concrete evidence provided from the transcript [<xref ref-type="bibr" rid="ref68">68</xref>]; clinical insight, which evaluates the value of insights for clinical assessment [<xref ref-type="bibr" rid="ref69">69</xref>]; feature coverage, which measures the comprehensiveness of linguistic feature analysis [<xref ref-type="bibr" rid="ref70">70</xref>]; and actionable information, which assesses the usefulness for health care professionals [<xref ref-type="bibr" rid="ref71">71</xref>].</p><p><xref ref-type="table" rid="table5">Table 5</xref> reports the numbers of wins for the method of Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>], wins for our method, and ties, as evaluated by Gemini-3.1-flash-lite. It shows that our method significantly outperformed the approach of Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>] across all criteria, with our method preferred in 49 out of 54 cases. The most notable difference appeared in feature coverage, where our approach received 52 out of 54 preferences. These results highlight the strength of our structured prompt design, which systematically addresses multiple linguistic dimensions (readability, fluency, richness of detail, and keyword relevance), providing a more comprehensive and clinically relevant explanation compared to fluency-focused approaches.</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Pairwise comparison results via Gemini-3.1-flash-lite.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Criteria</td><td align="left" valign="bottom">Wins for the method by Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>]<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></td><td align="left" valign="bottom">Wins for our method<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></td><td align="left" valign="bottom">Ties</td></tr></thead><tbody><tr><td align="left" valign="top">Diagnostic connection</td><td align="left" valign="top">3</td><td align="left" valign="top">31</td><td align="left" valign="top">20</td></tr><tr><td align="left" valign="top">Evidence supports</td><td align="left" valign="top">16</td><td align="left" valign="top">38</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Clinical insight</td><td align="left" valign="top">9</td><td align="left" valign="top">45</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Feature coverage</td><td align="left" valign="top">2</td><td align="left" valign="top">52</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Actionable information</td><td align="left" valign="top">5</td><td align="left" valign="top">49</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Overall winner</td><td align="left" valign="top">5</td><td align="left" valign="top">49</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Confidence level</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>High</td><td align="left" valign="top">9.26</td><td align="left" valign="top">90.74</td><td align="left" valign="top">0.00</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Medium</td><td align="left" valign="top">0.00</td><td align="left" valign="top">0.00</td><td align="left" valign="top">0.00</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Low</td><td align="left" valign="top">0.00</td><td align="left" valign="top">0.00</td><td align="left" valign="top">0.00</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>Values represent counts out of 54 pairwise comparisons.</p></fn></table-wrap-foot></table-wrap><p><xref ref-type="fig" rid="figure3">Figure 3</xref> illustrates a representative case comparison between the 2 explanation methods. In this example, the method of Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>] provides a general assessment of fluency, lacking structured analysis or explicit links to AD symptomatology. By contrast, our method offers a systematic evaluation across multiple linguistic features, with numerical scoring and specific observations linked to language dysfunction in AD.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Comparison of explanation methods. AD: Alzheimer disease.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e86965_fig03.png"/></fig></sec><sec id="s4-2"><title>Human Expert Evaluation</title><p>To address potential concerns regarding LLM-based evaluation bias, we conducted an independent human evaluation involving 2 neurologists from Taiwan medical centers. The experts were presented with randomized explanations from both methods for the same 54 cases in a blinded fashion, without knowledge of which method generated each explanation.</p><p>Evaluator A assigned preference to our method in 53 (98.1%) cases and to the method of Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>] in 1 case, with no tied evaluations. Evaluator B assigned preference to our method in 36 (66.7%) cases and to the method of Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>] in 9 cases, with 9 tied evaluations. Combined results showed 89 total preferences for our method (53+36) versus 10 for the method of Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>] (1+9), with 9 ties. Such a difference between human annotators is not a surprise, as several research studies have demonstrated that human judgments are more diverse and variable [<xref ref-type="bibr" rid="ref72">72</xref>-<xref ref-type="bibr" rid="ref74">74</xref>]. Nevertheless, these results demonstrate strong interevaluator agreement favoring our structured multifeature approach, with Evaluator A showing near-unanimous preference and Evaluator B displaying greater variability while maintaining overall preference for our method.</p><p>The convergence between automated and human expert evaluations strengthens the validity of our explainability assessment and demonstrates that our structured multifeature approach provides substantially more interpretable and clinically relevant explanations for AD detection from speech.</p><p><xref ref-type="fig" rid="figure3">Figure 3</xref> illustrates a representative case comparison between the 2 explanation methods. In this example, the method of Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>] provides a general assessment of fluency, lacking structured analysis or explicit links to AD symptomatology. By contrast, our method offers a systematic evaluation across multiple linguistic features, with numerical scoring and specific observations linked to language dysfunction in AD.</p><p>Analysis of representative cases reveals that our structured approach explicitly connects linguistic observations to cognitive impairment patterns typical of AD. For example, in case &#x201C;Adrsdt19,&#x201D; our method identified specific language disruptions such as hesitations, repetitions, and fragmented expressions, directly linking these to potential cognitive decline. In contrast, the approach of Bang et al [<xref ref-type="bibr" rid="ref36">36</xref>] often described language performance without establishing clear connections to AD-related impairments.</p><p>Note that LLM-based evaluation metrics should be interpreted with appropriate care; the convergence between Gemini-3.1-flash-lite and human expert assessments collectively strengthens the validity of our explainability findings.</p><p>To provide empirical support for feature selection, we conducted 2-tailed independent-samples <italic>t</italic> tests comparing LLM-derived feature scores between AD (n=122) and CN (n=115) groups (RStudio; Posit Software and R v4.5.0; R Foundation for Statistical Computing [<xref ref-type="bibr" rid="ref75">75</xref>]). The feature scores were obtained from a single randomly selected pipeline run, consistent with the approach used for the pairwise explainability evaluation. As shown in <xref ref-type="table" rid="table6">Table 6</xref>, all 4 features showed highly significant group differences (all <italic>P</italic>&#x003C;.001; df=235) with large effect sizes (Cohen <italic>d</italic> range: 1.11&#x2010;1.19). The largest mean difference was observed for richness of detail (AD: 3.42 vs CN: 5.04; <italic>&#x0394;</italic>=1.62), consistent with reduced informational specificity in AD speech. These findings independently validate the selection of the 4 features for the classification framework.</p><table-wrap id="t6" position="float"><label>Table 6.</label><caption><p><italic>T</italic>-test results comparing linguistic feature scores between Alzheimer disease (AD) and control normal (CN) groups.</p></caption><table id="table6" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Linguistic feature</td><td align="left" valign="bottom">AD<sup><xref ref-type="table-fn" rid="table6fn1">a</xref></sup> group, mean (SD)</td><td align="left" valign="bottom" colspan="2">AD, median (IQR)</td><td align="left" valign="bottom">CN<sup><xref ref-type="table-fn" rid="table6fn2">b</xref></sup> group, mean (SD)</td><td align="left" valign="bottom" colspan="2">CN, median (IQR)</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top">Readability</td><td align="left" valign="top">4.00 (1.47)</td><td align="left" valign="top" colspan="2">4 (3-5)</td><td align="left" valign="top">5.49 (1.12)</td><td align="left" valign="top" colspan="2">6 (5-6)</td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table6fn3">c</xref></sup></td></tr><tr><td align="left" valign="top">Fluency</td><td align="left" valign="top">3.68 (1.37)</td><td align="left" valign="top" colspan="2">4 (2-5)</td><td align="left" valign="top">5.23 (1.24)</td><td align="left" valign="top" colspan="2">6 (5-6)</td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table6fn3">c</xref></sup></td></tr><tr><td align="left" valign="top">Richness of detail</td><td align="left" valign="top">3.42 (1.48)</td><td align="left" valign="top" colspan="2">3 (2-4)</td><td align="left" valign="top">5.04 (1.44)</td><td align="left" valign="top" colspan="2">5 (4-6)</td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table6fn3">c</xref></sup></td></tr><tr><td align="left" valign="top">Keyword relevance</td><td align="left" valign="top">3.84 (1.61)</td><td align="left" valign="top" colspan="2">4 (3-5)</td><td align="left" valign="top">5.54 (1.43)</td><td align="left" valign="top" colspan="2">6 (5-7)</td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table6fn3">c</xref></sup></td></tr></tbody></table><table-wrap-foot><fn id="table6fn1"><p><sup>a</sup>AD: Alzheimer disease.</p></fn><fn id="table6fn2"><p><sup>b</sup>CN: control normal.</p></fn><fn id="table6fn3"><p><sup>c</sup>Degrees of freedom (df)=235.</p></fn></table-wrap-foot></table-wrap><p>We acknowledge that certain non-LLM state-of-the-art methods have reported higher diagnostic precision. For instance, Liu et al [<xref ref-type="bibr" rid="ref76">76</xref>] achieved an accuracy of 97.18% and an <italic>F</italic><sub>1</sub>-score of 97.09% on the same dataset by using a Mozilla Deep Speech ASR and BERT-based pipeline. While such deep-learning models excel in raw performance, they often function as &#x2019;black boxes&#x2019; with limited clinical interpretability. In contrast, our framework prioritizes explainable AI (XAI) by generating structured linguistic evidence across 4 dimensions. Our results demonstrate that while achieving an accuracy of 91.08%, the primary value of this work lies in providing transparent, actionable insights that are essential for clinical trust and diagnostic reasoning.</p><p>These findings suggest that our structured multifeature prompt design provides substantially more interpretable explanations for AD detection from speech. Enhanced explainability can improve clinical trust in AI-assisted diagnosis, potentially facilitating adoption in health care settings where transparency is essential. Furthermore, the detailed linguistic breakdowns generated by our method could support more targeted intervention strategies, enabling clinicians to focus on specific linguistic deficits observed in patients.</p></sec><sec id="s4-3"><title>Conclusion</title><p>Our structured LLM-based framework, which leverages 4 key linguistic features (readability, fluency, richness of detail, and keyword relevance), achieved 92% precision and 97% specificity in detecting AD from speech transcripts. While further clinical validation is needed, this work demonstrates that well-structured linguistic analysis using LLMs can provide a reliable and explainable method for preliminary AD screening. Our framework offers an accessible tool for early detection of cognitive decline, potentially reducing barriers to timely diagnosis. The high performance and interpretable outputs of our model suggest promising directions for integrating AI-assisted cognitive assessment into clinical practice. Particularly in early screening stages, where accessible, noninvasive assessment tools are most valuable, our method could serve as an effective complement to traditional diagnostic approaches.</p><p>Despite presenting key innovations in AD detection through structured linguistic analysis, our study has several limitations. The ADReSSo 2021 dataset, while balanced, may not fully capture the linguistic variability found across the patient with AD population. Additionally, our method is currently designed for English-specific transcripts, requiring multilingual adaptation for broader clinical applicability. Although the ADReSSo 2021 dataset is demographically balanced with age and gender matched between groups, broader clinical deployment may require demographic-adjusted scoring baselines to account for variables such as education level, native language, and cultural background; this remains a direction for future work. Further studies are needed to assess how AI-based assessments align with real-world medical diagnoses and clinical outcomes. Furthermore, our reliance on transcript-based analysis introduces potential ASR errors, suggesting future work should incorporate direct audio processing techniques.</p><p>Despite these challenges, our findings demonstrate that structured LLM-based linguistic assessment provides a scalable, interpretable tool for early AD detection. This approach effectively bridges the gap between AI-based text analysis and real-world clinical applications, offering a step forward in leveraging AI for cognitive health monitoring.</p></sec></sec></body><back><ack><p>During the preparation of this work, the authors used Claude AI and ChatGPT in order to improve the language and flow of the manuscript. After using this tool/service, the authors reviewed and edited the content as needed and take full responsibility for the content of the publication.</p></ack><notes><sec><title>Funding</title><p>This work was supported by research grants from Kaohsiung Municipal United Hospital, Taiwan (KMUH11305) and the National Science andTechnology Council, Taiwan (NSTC 112-2410-H-110-016-MY3).</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AD</term><def><p>Alzheimer disease</p></def></def-item><def-item><term id="abb2">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb3">ANU-ADRI</term><def><p>Australian National University Alzheimer Disease Risk Index</p></def></def-item><def-item><term id="abb4">ASR</term><def><p>automatic speech recognition</p></def></def-item><def-item><term id="abb5">BDAE</term><def><p>Boston Diagnostic Aphasia Examination</p></def></def-item><def-item><term id="abb6">BERT</term><def><p>Bidirectional Encoder Representations from Transformers</p></def></def-item><def-item><term id="abb7">CAIDE</term><def><p>Cardiovascular Risk Factors, Aging, and Dementia</p></def></def-item><def-item><term id="abb8">CAT</term><def><p>computerized adaptive testing</p></def></def-item><def-item><term id="abb9">CN</term><def><p>control normal</p></def></def-item><def-item><term id="abb10">IRB</term><def><p>Institutional Review Board</p></def></def-item><def-item><term id="abb11">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb12">MCI</term><def><p>mild cognitive impairment</p></def></def-item><def-item><term id="abb13">MLP</term><def><p>multilayer perceptron</p></def></def-item><def-item><term id="abb14">RoBERTa</term><def><p>Robustly Optimized BERT Pretraining Approach</p></def></def-item><def-item><term id="abb15">WHO</term><def><p>World Health Organization</p></def></def-item><def-item><term id="abb16">XAI</term><def><p>explainable artificial intelligence</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>Long</surname><given-names>S</given-names> </name><name name-style="western"><surname>Benoist</surname><given-names>C</given-names> </name><name name-style="western"><surname>Weidner</surname><given-names>W</given-names> </name></person-group><article-title>World alzheimer report 2023</article-title><year>2023</year><access-date>2026-05-06</access-date><publisher-name>Alzheimer&#x2019;s Disease International</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.alzint.org/u/World-Alzheimer-Report-2023.pdf">https://www.alzint.org/u/World-Alzheimer-Report-2023.pdf</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><article-title>2022 Alzheimer&#x2019;s disease facts and figures</article-title><source>Alzheimers Dement</source><year>2022</year><month>04</month><volume>18</volume><issue>4</issue><fpage>700</fpage><lpage>789</lpage><pub-id pub-id-type="doi">10.1002/alz.12638</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><article-title>2024 Alzheimer&#x2019;s disease facts and figures</article-title><source>Alzheimers Dement</source><year>2024</year><month>05</month><volume>20</volume><issue>5</issue><fpage>3708</fpage><lpage>3821</lpage><pub-id pub-id-type="doi">10.1002/alz.13809</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hurd</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Martorell</surname><given-names>P</given-names> </name><name name-style="western"><surname>Delavande</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mullen</surname><given-names>KJ</given-names> </name><name name-style="western"><surname>Langa</surname><given-names>KM</given-names> </name></person-group><article-title>Monetary costs of dementia in the United States</article-title><source>N Engl J Med</source><year>2013</year><month>04</month><day>4</day><volume>368</volume><issue>14</issue><fpage>1326</fpage><lpage>1334</lpage><pub-id pub-id-type="doi">10.1056/NEJMsa1204629</pub-id><pub-id pub-id-type="medline">23550670</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Banerjee</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rahman&#x2010;Amin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Eul&#x2010;Barker</surname><given-names>N</given-names> </name></person-group><article-title>New evidence on the value of timely diagnosis for people with dementia</article-title><source>Int J Geriat Psychiatry</source><year>2022</year><month>06</month><volume>37</volume><issue>6</issue><pub-id pub-id-type="doi">10.1002/gps.5733</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Couch</surname><given-names>E</given-names> </name><name name-style="western"><surname>Co</surname><given-names>M</given-names> </name><name name-style="western"><surname>Albertyn</surname><given-names>CP</given-names> </name><name name-style="western"><surname>Prina</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lawrence</surname><given-names>V</given-names> </name></person-group><article-title>A qualitative study of informal caregiver perceptions of the benefits of an early dementia diagnosis</article-title><source>BMC Health Serv Res</source><year>2024</year><month>04</month><day>24</day><volume>24</volume><issue>1</issue><fpage>508</fpage><pub-id pub-id-type="doi">10.1186/s12913-024-10957-6</pub-id><pub-id pub-id-type="medline">38658907</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Incorvaia</surname><given-names>AD</given-names> </name></person-group><article-title>Early detection of Alzheimer&#x2019;s disease benefits research, quality of life, and end-of-life planning</article-title><source>AJOB Neurosci</source><year>2021</year><volume>12</volume><issue>4</issue><fpage>243</fpage><lpage>244</lpage><pub-id pub-id-type="doi">10.1080/21507740.2021.1941404</pub-id><pub-id pub-id-type="medline">34704912</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barkhof</surname><given-names>F</given-names> </name><name name-style="western"><surname>Pressman</surname><given-names>PS</given-names> </name></person-group><article-title>Early detection of incipient Alzheimer pathology</article-title><source>Neurology (ECronicon)</source><year>2021</year><month>04</month><day>6</day><volume>96</volume><issue>14</issue><fpage>647</fpage><lpage>648</lpage><pub-id pub-id-type="doi">10.1212/WNL.0000000000011739</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nasreddine</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Garibotto</surname><given-names>V</given-names> </name><name name-style="western"><surname>Kyaga</surname><given-names>S</given-names> </name><name name-style="western"><surname>Padovani</surname><given-names>A</given-names> </name></person-group><article-title>The early diagnosis of Alzheimer&#x2019;s disease: a patient-centred conversation with the care team</article-title><source>Neurol Ther</source><year>2023</year><month>02</month><volume>12</volume><issue>1</issue><fpage>11</fpage><lpage>23</lpage><pub-id pub-id-type="doi">10.1007/s40120-022-00428-7</pub-id><pub-id pub-id-type="medline">36528836</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Altomare</surname><given-names>D</given-names> </name><name name-style="western"><surname>Stampacchia</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ribaldi</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Plasma biomarkers for Alzheimer&#x2019;s disease: a field-test in a memory clinic</article-title><source>J Neurol Neurosurg Psychiatry</source><year>2023</year><month>06</month><volume>94</volume><issue>6</issue><fpage>420</fpage><lpage>427</lpage><pub-id pub-id-type="doi">10.1136/jnnp-2022-330619</pub-id><pub-id pub-id-type="medline">37012066</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Blennow</surname><given-names>K</given-names> </name><name name-style="western"><surname>Galasko</surname><given-names>D</given-names> </name><name name-style="western"><surname>Perneczky</surname><given-names>R</given-names> </name><etal/></person-group><article-title>The potential clinical value of plasma biomarkers in Alzheimer&#x2019;s disease</article-title><source>Alzheimer&#x2019;s &#x0026; Dementia</source><year>2023</year><month>12</month><volume>19</volume><issue>12</issue><fpage>5805</fpage><lpage>5816</lpage><pub-id pub-id-type="doi">10.1002/alz.13455</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Anstey</surname><given-names>KJ</given-names> </name><name name-style="western"><surname>Zheng</surname><given-names>L</given-names> </name><name name-style="western"><surname>Peters</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Dementia risk scores and their role in the implementation of risk reduction guidelines</article-title><source>Front Neurol</source><year>2021</year><volume>12</volume><fpage>765454</fpage><pub-id pub-id-type="doi">10.3389/fneur.2021.765454</pub-id><pub-id pub-id-type="medline">35058873</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Galvin</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Aisen</surname><given-names>P</given-names> </name><name name-style="western"><surname>Langbaum</surname><given-names>JB</given-names> </name><etal/></person-group><article-title>Early stages of Alzheimer&#x2019;s disease: evolving the care team for optimal patient management</article-title><source>Front Neurol</source><year>2020</year><volume>11</volume><fpage>592302</fpage><pub-id pub-id-type="doi">10.3389/fneur.2020.592302</pub-id><pub-id pub-id-type="medline">33551954</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bernstein Sideman</surname><given-names>A</given-names> </name><name name-style="western"><surname>Chalmer</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ayers</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Lessons from detecting cognitive impairment including dementia (DetectCID) in primary care</article-title><source>J Alzheimers Dis</source><year>2022</year><volume>86</volume><issue>2</issue><fpage>655</fpage><lpage>665</lpage><pub-id pub-id-type="doi">10.3233/JAD-215106</pub-id><pub-id pub-id-type="medline">35124639</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reiss</surname><given-names>AB</given-names> </name><name name-style="western"><surname>de Levante Raphael</surname><given-names>D</given-names> </name><name name-style="western"><surname>Chin</surname><given-names>NA</given-names> </name><name name-style="western"><surname>Sinha</surname><given-names>V</given-names> </name></person-group><article-title>The physician&#x2019;s Alzheimer&#x2019;s disease management guide: early detection and diagnosis of cognitive impairment, Alzheimer&#x2019;s disease and related dementia</article-title><source>AIMS Public Health</source><year>2022</year><volume>9</volume><issue>4</issue><fpage>661</fpage><lpage>689</lpage><pub-id pub-id-type="doi">10.3934/publichealth.2022047</pub-id><pub-id pub-id-type="medline">36636148</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McGlohen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>HH</given-names> </name></person-group><article-title>Combining computer adaptive testing technology with cognitively diagnostic assessment</article-title><source>Behav Res Methods</source><year>2008</year><month>08</month><volume>40</volume><issue>3</issue><fpage>808</fpage><lpage>821</lpage><pub-id pub-id-type="doi">10.3758/brm.40.3.808</pub-id><pub-id pub-id-type="medline">18697677</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Van Patten</surname><given-names>R</given-names> </name></person-group><article-title>Introduction to the special issue - neuropsychology from a distance: psychometric properties and clinical utility of remote neurocognitive tests</article-title><source>J Clin Exp Neuropsychol</source><year>2021</year><month>10</month><volume>43</volume><issue>8</issue><fpage>767</fpage><lpage>773</lpage><pub-id pub-id-type="doi">10.1080/13803395.2021.2021645</pub-id><pub-id pub-id-type="medline">35133240</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>&#x00D6;hman</surname><given-names>F</given-names> </name><name name-style="western"><surname>Hassenstab</surname><given-names>J</given-names> </name><name name-style="western"><surname>Berron</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sch&#x00F6;ll</surname><given-names>M</given-names> </name><name name-style="western"><surname>Papp</surname><given-names>KV</given-names> </name></person-group><article-title>Current advances in digital cognitive assessment for preclinical Alzheimer&#x2019;s disease</article-title><source>Alzheimers Dement (Amst)</source><year>2021</year><month>01</month><access-date>2026-05-06</access-date><volume>13</volume><issue>1</issue><fpage>e12217</fpage><comment><ext-link ext-link-type="uri" xlink:href="https://alz-journals.onlinelibrary.wiley.com/toc/23528729/13/1">https://alz-journals.onlinelibrary.wiley.com/toc/23528729/13/1</ext-link></comment><pub-id pub-id-type="doi">10.1002/dad2.12217</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Roque</surname><given-names>N</given-names> </name><name name-style="western"><surname>Sliwinski</surname><given-names>M</given-names> </name><name name-style="western"><surname>Katz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Anderson-Hanley</surname><given-names>C</given-names> </name></person-group><article-title>Revolutionizing cognitive assessment: the power of digital biomarkers and ecologically valid methods</article-title><source>Innov Aging</source><year>2023</year><month>12</month><day>21</day><volume>7</volume><issue>Supplement_1</issue><fpage>380</fpage><lpage>380</lpage><pub-id pub-id-type="doi">10.1093/geroni/igad104.1260</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moore</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Swendsen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Depp</surname><given-names>CA</given-names> </name></person-group><article-title>Applications for self-administered mobile cognitive assessments in clinical research: a systematic review</article-title><source>Int J Methods Psychiatr Res</source><year>2017</year><month>12</month><volume>26</volume><issue>4</issue><fpage>e1562</fpage><pub-id pub-id-type="doi">10.1002/mpr.1562</pub-id><pub-id pub-id-type="medline">28370881</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bj&#x00F6;rngrim</surname><given-names>S</given-names> </name><name name-style="western"><surname>van den Hurk</surname><given-names>W</given-names> </name><name name-style="western"><surname>Betancort</surname><given-names>M</given-names> </name><name name-style="western"><surname>Machado</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lindau</surname><given-names>M</given-names> </name></person-group><article-title>Comparing traditional and digitized cognitive tests used in standard clinical evaluation - a study of the digital application minnemera</article-title><source>Front Psychol</source><year>2019</year><volume>10</volume><fpage>2327</fpage><pub-id pub-id-type="doi">10.3389/fpsyg.2019.02327</pub-id><pub-id pub-id-type="medline">31681117</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Staffaroni</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Tsoy</surname><given-names>E</given-names> </name><name name-style="western"><surname>Taylor</surname><given-names>J</given-names> </name><name name-style="western"><surname>Boxer</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Possin</surname><given-names>KL</given-names> </name></person-group><article-title>Digital cognitive assessments for dementia: digital assessments may enhance the efficiency of evaluations in neurology and other clinics</article-title><source>Pract Neurol (Fort Wash Pa)</source><year>2020</year><access-date>2026-05-23</access-date><volume>2020</volume><fpage>24</fpage><lpage>45</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://pubmed.ncbi.nlm.nih.gov/33927583/">https://pubmed.ncbi.nlm.nih.gov/33927583/</ext-link></comment><pub-id pub-id-type="medline">33927583</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Laske</surname><given-names>C</given-names> </name><name name-style="western"><surname>Sohrabi</surname><given-names>HR</given-names> </name><name name-style="western"><surname>Frost</surname><given-names>SM</given-names> </name><etal/></person-group><article-title>Innovative diagnostic tools for early detection of Alzheimer&#x2019;s disease</article-title><source>Alzheimers Dement</source><year>2015</year><month>05</month><volume>11</volume><issue>5</issue><fpage>561</fpage><lpage>578</lpage><pub-id pub-id-type="doi">10.1016/j.jalz.2014.06.004</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chou</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>CT</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>YN</given-names> </name><etal/></person-group><article-title>Screening for early Alzheimer&#x2019;s disease: enhancing diagnosis with linguistic features and biomarkers</article-title><source>Front Aging Neurosci</source><year>2024</year><volume>16</volume><fpage>1451326</fpage><pub-id pub-id-type="doi">10.3389/fnagi.2024.1451326</pub-id><pub-id pub-id-type="medline">39376506</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mao</surname><given-names>C</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Rasmussen</surname><given-names>L</given-names> </name><etal/></person-group><article-title>AD-BERT: using pre-trained language model to predict the progression from mild cognitive impairment to Alzheimer&#x2019;s disease</article-title><source>J Biomed Inform</source><year>2023</year><month>08</month><volume>144</volume><fpage>104442</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2023.104442</pub-id><pub-id pub-id-type="medline">37429512</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gagliardi</surname><given-names>G</given-names> </name></person-group><article-title>Natural language processing techniques for studying language in pathological ageing: a scoping review</article-title><source>Int J Lang Commun Disord</source><year>2024</year><volume>59</volume><issue>1</issue><fpage>110</fpage><lpage>122</lpage><pub-id pub-id-type="doi">10.1111/1460-6984.12870</pub-id><pub-id pub-id-type="medline">36960885</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Balagopalan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Novikova</surname><given-names>J</given-names> </name></person-group><article-title>Comparing acoustic-based approaches for alzheimer&#x2019;s disease detection</article-title><source>Interspeech 2021</source><access-date>2026-05-06</access-date><comment>Preprint posted online on 2021</comment><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2021">https://www.isca-archive.org/interspeech_2021</ext-link></comment><pub-id pub-id-type="doi">10.21437/Interspeech.2021-759</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Ortiz-Perez</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ruiz-Ponce</surname><given-names>P</given-names> </name><name name-style="western"><surname>Tom&#x00E1;s</surname><given-names>D</given-names> </name><name name-style="western"><surname>Garcia-Rodriguez</surname><given-names>J</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Garc&#x00ED;a Bringas</surname><given-names>P</given-names> </name></person-group><article-title>Deep learning-based dementia prediction using multimodal data</article-title><source>17th International Workshop on Soft Computing Models in Industrial and Environmental Applications (SOCO 2022)</source><year>2023</year><volume>531</volume><publisher-name>Springer, Cham</publisher-name><fpage>260</fpage><lpage>269</lpage><series>Lecture Notes in Networks and Systems</series><pub-id pub-id-type="doi">10.1007/978-3-031-18050-7_25</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Soroski</surname><given-names>T</given-names> </name><name name-style="western"><surname>da Cunha Vasco</surname><given-names>T</given-names> </name><name name-style="western"><surname>Newton-Mason</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Evaluating web-based automatic transcription for Alzheimer speech data: transcript comparison and machine learning analysis</article-title><source>JMIR Aging</source><year>2022</year><month>09</month><day>21</day><volume>5</volume><issue>3</issue><fpage>e33460</fpage><pub-id pub-id-type="doi">10.2196/33460</pub-id><pub-id pub-id-type="medline">36129754</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Ye</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Exploring linguistic feature and model combination for speech recognition based automatic AD detection</article-title><year>2022</year><access-date>2026-05-06</access-date><conf-name>Interspeech 2022</conf-name><conf-date>Sep 18-22, 2022</conf-date><conf-loc>Incheon, South Korea</conf-loc><fpage>3328</fpage><lpage>3332</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2022">https://www.isca-archive.org/interspeech_2022</ext-link></comment><pub-id pub-id-type="doi">10.21437/Interspeech.2022-723</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Cheung</surname><given-names>G</given-names> </name><name name-style="western"><surname>Shahamiri</surname><given-names>SR</given-names> </name></person-group><article-title>Speech and language processing with deep learning for dementia diagnosis: a systematic review</article-title><source>Psychiatry Res</source><year>2023</year><month>11</month><volume>329</volume><fpage>115538</fpage><pub-id pub-id-type="doi">10.1016/j.psychres.2023.115538</pub-id><pub-id pub-id-type="medline">37864994</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Graham</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>EE</given-names> </name><name name-style="western"><surname>Jeste</surname><given-names>DV</given-names> </name><etal/></person-group><article-title>Artificial intelligence approaches to predicting and detecting cognitive decline in older adults: a conceptual review</article-title><source>Psychiatry Res</source><year>2020</year><month>02</month><volume>284</volume><fpage>112732</fpage><pub-id pub-id-type="doi">10.1016/j.psychres.2019.112732</pub-id><pub-id pub-id-type="medline">31978628</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Palliya Guruge</surname><given-names>C</given-names> </name><name name-style="western"><surname>Oviatt</surname><given-names>S</given-names> </name><name name-style="western"><surname>Delir Haghighi</surname><given-names>P</given-names> </name><name name-style="western"><surname>Pritchard</surname><given-names>E</given-names> </name></person-group><article-title>Advances in multimodal behavioral analytics for early dementia diagnosis: a review</article-title><year>2021</year><month>10</month><day>18</day><access-date>2026-05-06</access-date><conf-name>ICMI &#x2019;21</conf-name><conf-date>Oct 18, 2021</conf-date><conf-loc>Montr&#x00E9;al QC Canada</conf-loc><fpage>328</fpage><lpage>340</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/proceedings/10.1145/3462244">https://dl.acm.org/doi/proceedings/10.1145/3462244</ext-link></comment><pub-id pub-id-type="doi">10.1145/3462244.3479933</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Pandey</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Shekhawat</surname><given-names>HS</given-names> </name><name name-style="western"><surname>Bhasin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Jasuja</surname><given-names>R</given-names> </name><name name-style="western"><surname>Prasanna</surname><given-names>S</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Kim</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>M</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Tiwary</surname><given-names>US</given-names> </name><name name-style="western"><surname>Sur</surname><given-names>M</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>D</given-names> </name></person-group><article-title>Alzheimer&#x2019;s dementia recognition using multimodal fusion of speech and text embeddings</article-title><source>Intelligent Human Computer Interaction (IHCI)</source><year>2021</year><volume>13184</volume><publisher-name>Springer, Cham</publisher-name><fpage>718</fpage><lpage>728</lpage><series>Lecture Notes in Computer Science</series><pub-id pub-id-type="doi">10.1007/978-3-030-98404-5_64</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Agbavor</surname><given-names>F</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>H</given-names> </name></person-group><article-title>Predicting dementia from spontaneous speech using large language models</article-title><source>PLOS Digit Health</source><year>2022</year><month>12</month><volume>1</volume><issue>12</issue><fpage>e0000168</fpage><pub-id pub-id-type="doi">10.1371/journal.pdig.0000168</pub-id><pub-id pub-id-type="medline">36812634</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Han</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kang</surname><given-names>B</given-names> </name></person-group><article-title>Alzheimer&#x2019;s disease recognition from spontaneous speech using large language models</article-title><source>ETRI Journal</source><year>2024</year><month>02</month><volume>46</volume><issue>1</issue><fpage>96</fpage><lpage>105</lpage><pub-id pub-id-type="doi">10.4218/etrij.2023-0356</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names></name><name name-style="western"><surname>Chang</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names> </name></person-group><article-title>Bert: pre-training of deep bidirectional transformers for language understanding</article-title><year>2019</year><access-date>2026-05-23</access-date><conf-name>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</conf-name><conf-date>Jun 2-7, 2019</conf-date><conf-loc>Minneapolis, Minnesota</conf-loc><publisher-name>Association for Computational Linguistics</publisher-name><fpage>4171</fpage><lpage>4186</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/N19-1423/">https://aclanthology.org/N19-1423/</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Padhee</surname><given-names>S</given-names> </name><name name-style="western"><surname>Illendula</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sadler</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Predicting early indicators of cognitive decline from verbal utterances</article-title><access-date>2026-05-06</access-date><conf-name>2020 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)</conf-name><conf-date>2020</conf-date><conf-loc>Seoul, Korea (South</conf-loc><fpage>477</fpage><lpage>480</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=9312958">https://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=9312958</ext-link></comment><pub-id pub-id-type="doi">10.1109/BIBM49941.2020.9313106</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Rohanian</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hough</surname><given-names>J</given-names> </name><name name-style="western"><surname>Purver</surname><given-names>M</given-names> </name></person-group><article-title>Alzheimer&#x2019;s dementia recognition using acoustic, lexical, disfluency and speech pause features robust to noisy inputs</article-title><access-date>2026-05-06</access-date><conf-name>Interspeech 2021</conf-name><conf-date>2021</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2021">https://www.isca-archive.org/interspeech_2021</ext-link></comment><pub-id pub-id-type="doi">10.21437/Interspeech.2021-1633</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mahajan</surname><given-names>P</given-names> </name><name name-style="western"><surname>Baths</surname><given-names>V</given-names> </name></person-group><article-title>Acoustic and language based deep learning approaches for Alzheimer&#x2019;s dementia detection from spontaneous speech</article-title><source>Front Aging Neurosci</source><year>2021</year><volume>13</volume><fpage>623607</fpage><pub-id pub-id-type="doi">10.3389/fnagi.2021.623607</pub-id><pub-id pub-id-type="medline">33613269</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Qiao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yin</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wiechmann</surname><given-names>D</given-names> </name><name name-style="western"><surname>Kerz</surname><given-names>E</given-names> </name></person-group><article-title>Alzheimer&#x2019;s disease detection from spontaneous speech through combining linguistic complexity and (dis)fluency features with pretrained language models</article-title><access-date>2026-05-06</access-date><conf-name>Interspeech 2021</conf-name><conf-date>2021</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2021">https://www.isca-archive.org/interspeech_2021</ext-link></comment><pub-id pub-id-type="doi">10.21437/Interspeech.2021-1415</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Yuan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bian</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Cai</surname><given-names>X</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ye</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Church</surname><given-names>K</given-names> </name></person-group><article-title>Disfluencies and fine-tuning pre-trained language models for detection of Alzheimer&#x2019;s disease</article-title><access-date>2026-05-06</access-date><conf-name>Interspeech 2020</conf-name><conf-date>2020</conf-date><fpage>2162</fpage><lpage>2166</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2020">https://www.isca-archive.org/interspeech_2020</ext-link></comment><pub-id pub-id-type="doi">10.21437/Interspeech.2020-2516</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Li</surname><given-names>A</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name></person-group><article-title>Text dialogue analysis for primary screening of mild cognitive impairment: development and validation study</article-title><source>J Med Internet Res</source><year>2023</year><month>12</month><day>29</day><volume>25</volume><fpage>e51501</fpage><pub-id pub-id-type="doi">10.2196/51501</pub-id><pub-id pub-id-type="medline">38157230</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Balamurali</surname><given-names>B</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>JM</given-names> </name></person-group><article-title>Performance assessment of ChatGPT versus Bard in detecting Alzheimer&#x2019;s dementia</article-title><source>Diagnostics (Basel)</source><year>2024</year><month>04</month><day>15</day><volume>14</volume><issue>8</issue><fpage>817</fpage><pub-id pub-id-type="doi">10.3390/diagnostics14080817</pub-id><pub-id pub-id-type="medline">38667463</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Luz</surname><given-names>S</given-names> </name><name name-style="western"><surname>Haider</surname><given-names>F</given-names> </name><name name-style="western"><surname>Fuente</surname><given-names>S de la</given-names> </name><name name-style="western"><surname>Fromm</surname><given-names>D</given-names> </name><name name-style="western"><surname>MacWhinney</surname><given-names>B</given-names> </name></person-group><article-title>Detecting cognitive decline using speech only: the adresso challenge</article-title><access-date>2026-05-06</access-date><conf-name>Interspeech 2021</conf-name><conf-date>2021</conf-date><fpage>3780</fpage><lpage>3784</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2021">https://www.isca-archive.org/interspeech_2021</ext-link></comment><pub-id pub-id-type="doi">10.21437/Interspeech.2021-1220</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Becker</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Boller</surname><given-names>F</given-names> </name><name name-style="western"><surname>Lopez</surname><given-names>OL</given-names> </name><name name-style="western"><surname>Saxton</surname><given-names>J</given-names> </name><name name-style="western"><surname>McGonigle</surname><given-names>KL</given-names> </name></person-group><article-title>The natural history of Alzheimer&#x2019;s disease. Description of study cohort and accuracy of diagnosis</article-title><source>Arch Neurol</source><year>1994</year><month>06</month><volume>51</volume><issue>6</issue><fpage>585</fpage><lpage>594</lpage><pub-id pub-id-type="doi">10.1001/archneur.1994.00540180063015</pub-id><pub-id pub-id-type="medline">8198470</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Goodglass</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kaplan</surname><given-names>E</given-names> </name><name name-style="western"><surname>Barresi</surname><given-names>B</given-names> </name></person-group><source>BDAE: The Boston Diagnostic Aphasia Examination</source><year>2001</year><publisher-name>Lippincott Williams &#x0026; Wilkins</publisher-name><fpage>1</fpage><lpage>85</lpage><pub-id pub-id-type="other">9780683305593</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Radford</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Brockman</surname><given-names>G</given-names> </name><name name-style="western"><surname>McLeavey</surname><given-names>C</given-names> </name><name name-style="western"><surname>Sutskever</surname><given-names>I</given-names> </name></person-group><article-title>Robust speech recognition via large-scale weak supervision</article-title><source>Proceedings of the International Conference on Machine Learning (ICML)</source><access-date>2026-05-23</access-date><volume>202</volume><publisher-name>Proceedings of Machine Learning Research</publisher-name><fpage>28492</fpage><lpage>28518</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://proceedings.mlr.press/v202/radford23a.html">https://proceedings.mlr.press/v202/radford23a.html</ext-link></comment></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Bredin</surname><given-names>H</given-names> </name><name name-style="western"><surname>Laurent</surname><given-names>A</given-names> </name></person-group><article-title>End-to-end speaker segmentation for overlap-aware resegmentation</article-title><access-date>2026-05-06</access-date><conf-name>Interspeech 2021</conf-name><conf-date>2021</conf-date><fpage>3707</fpage><lpage>3711</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2021">https://www.isca-archive.org/interspeech_2021</ext-link></comment><pub-id pub-id-type="doi">10.21437/Interspeech.2021-560</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>N</given-names> </name><name name-style="western"><surname>Yuan</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Tang</surname><given-names>Q</given-names> </name></person-group><article-title>Improving Alzheimer&#x2019;s disease detection for speech based on feature purification network</article-title><source>Front Public Health</source><year>2021</year><volume>9</volume><fpage>835960</fpage><pub-id pub-id-type="doi">10.3389/fpubh.2021.835960</pub-id><pub-id pub-id-type="medline">35310782</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alkenani</surname><given-names>AH</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Q</given-names> </name></person-group><article-title>Predicting Alzheimer&#x2019;s disease from spoken and written language using fusion-based stacked generalization</article-title><source>J Biomed Inform</source><year>2021</year><month>06</month><volume>118</volume><fpage>103803</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2021.103803</pub-id><pub-id pub-id-type="medline">33965639</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Sarawgi</surname><given-names>U</given-names> </name><name name-style="western"><surname>Zulfikar</surname><given-names>W</given-names> </name><name name-style="western"><surname>Soliman</surname><given-names>N</given-names> </name><name name-style="western"><surname>Maes</surname><given-names>P</given-names> </name></person-group><article-title>Multimodal inductive transfer learning for detection of Alzheimer&#x2019;s dementia and its severity</article-title><access-date>2026-05-06</access-date><conf-name>Interspeech 2020</conf-name><conf-date>Oct 25-29, 2020</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2020">https://www.isca-archive.org/interspeech_2020</ext-link></comment><pub-id pub-id-type="doi">10.21437/Interspeech.2020-3137</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berisha</surname><given-names>V</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>S</given-names> </name><name name-style="western"><surname>LaCross</surname><given-names>A</given-names> </name><name name-style="western"><surname>Liss</surname><given-names>J</given-names> </name></person-group><article-title>Tracking discourse complexity preceding Alzheimer&#x2019;s disease diagnosis: a case study comparing the press conferences of Presidents Ronald Reagan and George Herbert Walker Bush</article-title><source>J Alzheimers Dis</source><year>2015</year><volume>45</volume><issue>3</issue><fpage>959</fpage><lpage>963</lpage><pub-id pub-id-type="doi">10.3233/JAD-142763</pub-id><pub-id pub-id-type="medline">25633673</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nasreen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rohanian</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hough</surname><given-names>J</given-names> </name><name name-style="western"><surname>Purver</surname><given-names>M</given-names> </name></person-group><article-title>Alzheimer&#x2019;s dementia recognition from spontaneous speech using disfluency and interactional features</article-title><source>Front Comput Sci</source><year>2021</year><volume>3</volume><fpage>640669</fpage><pub-id pub-id-type="doi">10.3389/fcomp.2021.640669</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fraser</surname><given-names>KC</given-names> </name><name name-style="western"><surname>Meltzer</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Rudzicz</surname><given-names>F</given-names> </name></person-group><article-title>Linguistic features identify Alzheimer&#x2019;s disease in narrative speech</article-title><source>J Alzheimers Dis</source><year>2016</year><volume>49</volume><issue>2</issue><fpage>407</fpage><lpage>422</lpage><pub-id pub-id-type="doi">10.3233/JAD-150520</pub-id><pub-id pub-id-type="medline">26484921</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Toledo</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Alu&#x00ED;sio</surname><given-names>SM</given-names> </name><name name-style="western"><surname>dos Santos</surname><given-names>LB</given-names> </name><etal/></person-group><article-title>Analysis of macrolinguistic aspects of narratives from individuals with Alzheimer&#x2019;s disease, mild cognitive impairment, and no cognitive impairment</article-title><source>Alzheimers Dement (Amst)</source><year>2018</year><month>01</month><volume>10</volume><issue>1</issue><fpage>31</fpage><lpage>40</lpage><pub-id pub-id-type="doi">10.1016/j.dadm.2017.08.005</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lindsay</surname><given-names>H</given-names> </name><name name-style="western"><surname>Tr&#x00F6;ger</surname><given-names>J</given-names> </name><name name-style="western"><surname>K&#x00F6;nig</surname><given-names>A</given-names> </name></person-group><article-title>Language impairment in Alzheimer&#x2019;s disease-robust and explainable evidence for AD-related deterioration of spontaneous speech through multilingual machine learning</article-title><source>Front Aging Neurosci</source><year>2021</year><volume>13</volume><fpage>642033</fpage><pub-id pub-id-type="doi">10.3389/fnagi.2021.642033</pub-id><pub-id pub-id-type="medline">34093165</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Karlekar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Niu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Bansal</surname><given-names>M</given-names> </name></person-group><article-title>Detecting linguistic characteristics of alzheimer&#x2019;s dementia by interpreting neural models</article-title><access-date>2026-05-06</access-date><conf-name>Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 2 (Short Papers)</conf-name><conf-date>Jun 1-6, 2018</conf-date><comment><ext-link ext-link-type="uri" xlink:href="http://aclweb.org/anthology/N18-2">http://aclweb.org/anthology/N18-2</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/N18-2110</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="thesis"><person-group person-group-type="author"><name name-style="western"><surname>Kong</surname><given-names>W</given-names> </name></person-group><article-title>Exploring neural models for predicting dementia from language</article-title><year>2019</year><access-date>2026-05-23</access-date><publisher-name>University of British Columbia</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://open.library.ubc.ca/soa/cIRcle/collections/ubctheses/24/items/1.0380363">https://open.library.ubc.ca/soa/cIRcle/collections/ubctheses/24/items/1.0380363</ext-link></comment><pub-id pub-id-type="doi">10.14288/1.0380363</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Wei</surname><given-names>J</given-names> </name><name name-style="western"><surname>Tay</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bommasani</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Emergent abilities of large language models</article-title><source>Transactions on Machine Learning Research</source><year>2022</year><access-date>2026-05-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openreview.net/forum?id=yzkSU5zdwD">https://openreview.net/forum?id=yzkSU5zdwD</ext-link></comment></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="other"><person-group person-group-type="author"><collab>OpenAI</collab></person-group><article-title>GPT-4 technical report</article-title><source>arXiv</source><access-date>2026-05-06</access-date><comment>Preprint posted online on  Mar 4, 2024</comment><comment><ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2303.08774">https://arxiv.org/abs/2303.08774</ext-link></comment></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Kingma</surname><given-names>DP</given-names> </name><name name-style="western"><surname>Ba</surname><given-names>J</given-names> </name></person-group><article-title>Adam: a method for stochastic optimization</article-title><source>arXiv</source><comment>Preprint posted online on  Jan 30, 2017</comment><pub-id pub-id-type="doi">10.48550/arXiv.1412.6980</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Grattafiori</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dubey</surname><given-names>A</given-names> </name><name name-style="western"><surname>Jauhri</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pandey</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kadian</surname><given-names>A</given-names> </name><etal/></person-group><article-title>The llama 3 herd of models</article-title><source>arXiv</source><access-date>2026-05-25</access-date><comment>Preprint posted online on  Nov 23, 2024</comment><comment><ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2407.21783">https://arxiv.org/abs/2407.21783</ext-link></comment></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Nussbaum</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Morris</surname><given-names>JX</given-names> </name><name name-style="western"><surname>Duderstadt</surname><given-names>B</given-names> </name><name name-style="western"><surname>Mulyar</surname><given-names>A</given-names> </name></person-group><article-title>Nomic embed: training a reproducible long context text embedder</article-title><source>arXiv</source><access-date>2026-05-06</access-date><comment>Preprint posted online on  Feb 3, 2025</comment><comment><ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2402.01613">https://arxiv.org/abs/2402.01613</ext-link></comment></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="other"><person-group person-group-type="author"><collab>Gemini Team</collab></person-group><article-title>Google deepmind, gemini: a family of highly capable multimodal models</article-title><source>arXiv</source><access-date>2026-05-06</access-date><comment>Preprint posted online on  May 5, 2025</comment><comment><ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2312.11805">https://arxiv.org/abs/2312.11805</ext-link></comment></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Iter</surname><given-names>D</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>R</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>C</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Bouamor</surname><given-names>H</given-names> </name><name name-style="western"><surname>Pino</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bali</surname><given-names>K</given-names> </name></person-group><article-title>G-eval: NLG evaluation using gpt-4 with better human alignment</article-title><access-date>2026-05-06</access-date><conf-name>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</conf-name><conf-date>Dec 6-10, 2023</conf-date><conf-loc>Singapore</conf-loc><fpage>2511</fpage><lpage>2522</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2023.emnlp-main">https://aclanthology.org/2023.emnlp-main</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/2023.emnlp-main.153</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barredo Arrieta</surname><given-names>A</given-names> </name><name name-style="western"><surname>D&#x00ED;az-Rodr&#x00ED;guez</surname><given-names>N</given-names> </name><name name-style="western"><surname>Del Ser</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Explainable artificial intelligence (XAI): concepts, taxonomies, opportunities and challenges toward responsible AI</article-title><source>Information Fusion</source><year>2020</year><month>06</month><volume>58</volume><fpage>82</fpage><lpage>115</lpage><pub-id pub-id-type="doi">10.1016/j.inffus.2019.12.012</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ribeiro</surname><given-names>MT</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Guestrin</surname><given-names>C</given-names> </name></person-group><article-title>&#x201C;Why should i trust you?&#x201D;: explaining the predictions of any classifier</article-title><access-date>2026-05-07</access-date><conf-name>Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics</conf-name><conf-date>Aug 13, 2016</conf-date><conf-loc>San Diego, California</conf-loc><fpage>1135</fpage><lpage>1144</lpage><comment><ext-link ext-link-type="uri" xlink:href="http://aclweb.org/anthology/N16-3">http://aclweb.org/anthology/N16-3</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/N16-3020</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Lundberg</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SI</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Guyon</surname><given-names>I</given-names></name><name name-style="western"><surname>von Luxburg</surname><given-names>U</given-names> </name><name name-style="western"><surname>Bengio</surname><given-names>S</given-names></name><name name-style="western"><surname>Wallach</surname><given-names>H</given-names></name><name name-style="western"><surname>Fergus</surname><given-names>R</given-names></name><name name-style="western"><surname>Vishwanathan</surname><given-names>SVN</given-names></name></person-group><article-title>A unified approach to interpreting model predictions</article-title><source>Adv Neural Inf Process Syst</source><year>2017</year><access-date>2026-05-25</access-date><volume>30</volume><fpage>4765</fpage><lpage>4774</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions">https://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions</ext-link></comment></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holzinger</surname><given-names>A</given-names> </name><name name-style="western"><surname>Langs</surname><given-names>G</given-names> </name><name name-style="western"><surname>Denk</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zatloukal</surname><given-names>K</given-names> </name><name name-style="western"><surname>M&#x00FC;ller</surname><given-names>H</given-names> </name></person-group><article-title>Causability and explainability of artificial intelligence in medicine</article-title><source>Wiley Interdiscip Rev Data Min Knowl Discov</source><year>2019</year><volume>9</volume><issue>4</issue><fpage>e1312</fpage><pub-id pub-id-type="doi">10.1002/widm.1312</pub-id><pub-id pub-id-type="medline">32089788</pub-id></nlm-citation></ref><ref id="ref71"><label>71</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Tonekaboni</surname><given-names>S</given-names> </name><name name-style="western"><surname>Joshi</surname><given-names>S</given-names> </name><name name-style="western"><surname>McCradden</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Goldenberg</surname><given-names>A</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Doshi-Velez</surname><given-names>F</given-names> </name><name name-style="western"><surname>Fackler</surname><given-names>J</given-names></name><name name-style="western"><surname>Jung</surname><given-names>K</given-names></name></person-group><article-title>What clinicians want: contextualizing explainable machine learning for clinical end use</article-title><access-date>2026-05-25</access-date><conf-name>Proceedings of the 4th Machine Learning for Healthcare Conference</conf-name><conf-date>Aug 9-10, 2019</conf-date><conf-loc>Ann Arbor, Michigan</conf-loc><fpage>359</fpage><lpage>380</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://proceedings.mlr.press/v106/tonekaboni19a.html">https://proceedings.mlr.press/v106/tonekaboni19a.html</ext-link></comment></nlm-citation></ref><ref id="ref72"><label>72</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kahneman</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sibony</surname><given-names>O</given-names> </name><name name-style="western"><surname>Sunstein</surname><given-names>CR</given-names> </name></person-group><source>Noise: A Flaw in Human Judgment</source><year>2021</year><access-date>2026-05-25</access-date><publisher-name>Little, Brown Spark</publisher-name><fpage>1</fpage><lpage>454</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.hachettebookgroup.com/titles/daniel-kahneman/noise/9780316451406/?">https://www.hachettebookgroup.com/titles/daniel-kahneman/noise/9780316451406/?</ext-link></comment><pub-id pub-id-type="other">9780316451406</pub-id></nlm-citation></ref><ref id="ref73"><label>73</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Bavaresco</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bernardi</surname><given-names>R</given-names> </name><name name-style="western"><surname>Bertolazzi</surname><given-names>L</given-names> </name><etal/></person-group><article-title>LLMs instead of human judges? a large scale empirical study across 20 nlp evaluation tasks</article-title><access-date>2026-05-06</access-date><conf-name>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 2)</conf-name><conf-date>Jul 27 to Aug 1, 2025</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2025.acl-short">https://aclanthology.org/2025.acl-short</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/2025.acl-short.20</pub-id></nlm-citation></ref><ref id="ref74"><label>74</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Van Den Burg</surname><given-names>G j.</given-names> </name><name name-style="western"><surname>Suzuki</surname><given-names>G</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Sensoy</surname><given-names>M</given-names> </name></person-group><article-title>Aligning black-box language models with human judgments</article-title><access-date>2026-05-06</access-date><conf-name>Findings of the Association for Computational Linguistics</conf-name><conf-date>Apr 29 to May 4, 2025</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2025.findings-naacl">https://aclanthology.org/2025.findings-naacl</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/2025.findings-naacl.376</pub-id></nlm-citation></ref><ref id="ref75"><label>75</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>R Core Team</collab></person-group><article-title>R: a language and environment for statistical computing</article-title><source>R foundation for statistical computing</source><year>2024</year><access-date>2026-05-07</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.R-project.org/">https://www.R-project.org/</ext-link></comment></nlm-citation></ref><ref id="ref76"><label>76</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Proctor</surname><given-names>L</given-names> </name><name name-style="western"><surname>Collier</surname><given-names>PN</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>X</given-names> </name></person-group><article-title>Automatic diagnosis and prediction of cognitive decline associated with Alzheimer&#x2019;s dementia through spontaneous speech</article-title><conf-name>2021 IEEE International Conference on Signal and Image Processing Applications (ICSIPA)</conf-name><conf-date>Sep 13-15, 2021</conf-date><conf-loc>Kuala Terengganu, Malaysia</conf-loc><fpage>39</fpage><lpage>43</lpage><pub-id pub-id-type="doi">10.1109/ICSIPA52582.2021.9576784</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Large language model prompt templates for speaker attribution and linguistic feature analysis.</p><media xlink:href="medinform_v14i1e86965_app1.docx" xlink:title="DOCX File, 18 KB"/></supplementary-material></app-group></back></article>