<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e78041</article-id><article-id pub-id-type="doi">10.2196/78041</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>Trends and Trajectories in the Rise of Large Language Models in Radiology: Scoping Review</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Al Zaabi</surname><given-names>Adhari</given-names></name><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Alshibli</surname><given-names>Rashid</given-names></name><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>AlAmri</surname><given-names>Abdullah</given-names></name><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>AlRuheili</surname><given-names>Ibrahim</given-names></name><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lutfi</surname><given-names>Syaheerah Lebai</given-names></name><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Human and Clinical Anatomy Department, College of Medicine and Health Sciences, Sultan Qaboos University</institution><addr-line>P.O. Box 35, Al Khodh</addr-line><addr-line>Muscat</addr-line><country>Oman</country></aff><aff id="aff2"><institution>College of Medicine and Health Sciences, Sultan Qaboos University</institution><addr-line>Muscat</addr-line><country>Oman</country></aff><aff id="aff3"><institution>Medical Education and Informatics Department, College of Medicine and Health Sciences, Sultan Qaboos University</institution><addr-line>Muscat</addr-line><country>Oman</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Reichenpfader</surname><given-names>Daniel</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Zhang</surname><given-names>Jun</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Adhari Al Zaabi, Human and Clinical Anatomy Department, College of Medicine and Health Sciences, Sultan Qaboos University, P.O. Box 35, Al Khodh, Muscat, 123, Oman; <email>adhari@squ.edu.om</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>9</day><month>12</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e78041</elocation-id><history><date date-type="received"><day>25</day><month>05</month><year>2025</year></date><date date-type="rev-recd"><day>30</day><month>10</month><year>2025</year></date><date date-type="accepted"><day>31</day><month>10</month><year>2025</year></date></history><copyright-statement>&#x00A9; Adhari Al Zaabi, Rashid Alshibli, Abdullah AlAmri, Ibrahim AlRuheili, Syaheerah Lebai Lutfi. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 9.12.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e78041"/><abstract><sec><title>Background</title><p>The use of large language models (LLMs) in radiology is expanding rapidly, offering new possibilities in report generation, decision support, and workflow optimization. However, a comprehensive evaluation of their applications, performance, and limitations across the radiology domain remains limited.</p></sec><sec><title>Objective</title><p>This review aimed to map current applications of LLMs in radiology, evaluate their performance across key tasks, and identify prevailing limitations and directions for future research.</p></sec><sec sec-type="methods"><title>Methods</title><p>A scoping review was conducted in accordance with the framework by Arksey and O&#x2019;Malley framework and the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) guidelines. Three databases&#x2014;PubMed, ScopusCOPUS, and IEEE Xplore&#x2014;were searched for peer-reviewed studies published between January 2022 and December 2024. Eligible studies included empirical evaluations of LLMs applied to radiological data or workflows. Commentaries, reviews, and technical model proposals without evaluation were excluded. Two reviewers independently screened studies and extracted data on study characteristics, LLM type, radiological use case, data modality, and evaluation metrics. A thematic synthesis was used to identify key domains of application. No formal risk-of-bias assessment was performed, but a narrative appraisal of dataset representativeness and study quality was included.</p></sec><sec sec-type="results"><title>Results</title><p>A total of 67 studies were included. (n/N, %)GPT-4 was the most frequently used model (n=28, 42%), with text-based corpora as the primary type of data used (n=43, 64%). Identified use cases fell into three thematic domains: (1) decision support (n=39, 58%), (2) report generation and summarization (n=16, 24%), and (3) workflow optimization (n=12, 18%). While LLMs demonstrated strong performance in structured-text tasks (eg, report simplification with &#x003E;94% accuracy), diagnostic performance varied widely (16%-86%) and was limited by dataset bias, lack of fine tuning, and minimal clinical validation. Most studies (n=53, 79.1%) had single-center, proof-of-concept designs with limited generalizability.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>LLMs show strong potential for augmenting radiological workflows, particularly for structured reporting, summarization, and educational tasks. However, their diagnostic performance remains inconsistent, and current implementations lack robust external validation. Future work should prioritize prospective, multicenter validation of domain-adapted and multimodal models to support safe clinical integration.</p></sec></abstract><kwd-group><kwd>large language models</kwd><kwd>GPT-4</kwd><kwd>scoping review</kwd><kwd>natural language processing</kwd><kwd>report generation</kwd><kwd>clinical decision support</kwd><kwd>workflow optimization</kwd><kwd>artificial intelligence</kwd><kwd>AI</kwd><kwd>radiology</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The integration of artificial intelligence (AI) into health care has accelerated over the past decade, with large language models (LLMs) emerging as transformative tools for natural language processing in clinical contexts. Built on transformer architectures, models such as GPT-4, bidirectional encoder representations from transformers (BERT), and Text-to-Text Transfer Transformer (T5) have demonstrated high performance in text-based tasks such as summarization, classification, and information extraction across general and clinical domains [<xref ref-type="bibr" rid="ref1">1</xref>].</p><p>Radiology is inherently data intensive and text rich, making it an ideal domain for the application of LLMs. These models can support a wide range of tasks, including automated report generation, structured documentation, code assignment, and even preliminary diagnostic reasoning from clinical narratives [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref5">5</xref>] Despite the growing number of pilot studies, there is no unified synthesis evaluating the practical effectiveness, integration readiness, and safety implications of LLMs in real-world radiology settings.</p><p>Several prior scoping reviews have investigated the use of LLMs in radiology, but these have typically focused on specific application domains. For example, Reichenpfader et al [<xref ref-type="bibr" rid="ref6">6</xref>] performed conducted a scoping review focused exclusively on information extraction from radiology reports. Their analysis highlighted that most approaches relied on encoder-based transformer models such as BERT, that datasets were often small and single center, and that performance varied substantially by annotation quality and task definition. They concluded that, while information extraction is promising, generalizability and external validation are lacking [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Busch et al [<xref ref-type="bibr" rid="ref8">8</xref>] conducted a narrative overview of approximately 10 studies specifically addressing structured reporting in radiology. They emphasized the potential of GPT-3.5 and GPT-4 to transform free text into structured templates and discussed opportunities for multilingual structured reporting adoption. Their analysis was conceptual, with limited systematic synthesis across tasks. Nakaura et al [<xref ref-type="bibr" rid="ref9">9</xref>] traced the evolution of deep learning and transformer architectures in radiology; explained key limitations such as hallucinations, bias, and lack of explainability; and emphasized the risks of premature deployment in clinical decision support. Their review highlighted proof-of-concept applications, including report generation, translation of radiology reports into plain language, exam preparation, and early feasibility of protocol selection and research support [<xref ref-type="bibr" rid="ref9">9</xref>].</p><p>Unlike these prior reviews that were narrowly focused on single use cases (information extraction or patient-facing report simplification), our study systematically mapped the full spectrum of LLM applications across radiology&#x2014;including decision support, report generation, workflow optimization, and education. Furthermore, our work integrated both generative and nongenerative transformer models, multimodal applications, and educational and operational use cases. This broader lens allowed us to identify converging themes; quantify distribution across modalities; and highlight gaps in validation, equity, and clinical integration. Accordingly, this review aimed to systematically map the applications of LLMs in radiology; evaluate their reported outcomes; and provide a thematic synthesis of emerging use cases, methodological trends, and future research priorities.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>This scoping review was conducted in accordance with the methodological framework proposed by Arksey and O&#x2019;Malley [<xref ref-type="bibr" rid="ref10">10</xref>] and adhered to the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) checklist (<xref ref-type="supplementary-material" rid="app7">Checklist 1</xref>) to ensure methodological transparency and reproducibility.</p></sec><sec id="s2-2"><title>Eligibility Criteria (PICOS-Based)</title><p>Eligibility criteria were defined using the population, intervention, comparator, outcomes, and study designPICOS framework (<xref ref-type="table" rid="table1">Table 1</xref>). We included peer-reviewed empirical studies evaluating LLM applications in radiology workflows using models such as GPT-3 and GPT-4, BERT, or domain-specific transformers. Reviews, opinion pieces, and conference abstracts were excluded. Only English-language studies published between January 2022 and December 2024 were included due to resource limitations, which we acknowledge may restrict the generalizability of the findings.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Eligibility criteria for study selection structured using the PICOS framework (population, intervention, comparator, outcomes, study design) with additional filtering criteria related to language and publication date.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">PICOS domain or criterion</td><td align="left" valign="bottom">Inclusion criteria</td><td align="left" valign="bottom">Exclusion criteria</td></tr></thead><tbody><tr><td align="left" valign="top">Population</td><td align="left" valign="top">Studies involving radiology professionals, radiological workflows, or radiology-related data</td><td align="left" valign="top">Studies unrelated to radiology or without reference to radiological applications</td></tr><tr><td align="left" valign="top">Intervention</td><td align="left" valign="top">Use or evaluation of LLMs<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, including GPT-3 and GPT-4, BERT<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup>, or custom transformer models</td><td align="left" valign="top">Studies using general AI<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup> models without a language modeling component</td></tr><tr><td align="left" valign="top">Comparator</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">Outcomes</td><td align="left" valign="top">Reported outcomes related to LLM performance, feasibility, integration, or limitations in radiology</td><td align="left" valign="top">Studies lacking outcome data or reporting only theoretical frameworks without application</td></tr><tr><td align="left" valign="top">Study design</td><td align="left" valign="top">Peer-reviewed empirical studies (qualitative, quantitative, or mixed methods)</td><td align="left" valign="top">Reviews, editorials, opinion pieces, letters, and conference abstracts</td></tr><tr><td align="left" valign="top">Language</td><td align="left" valign="top">English</td><td align="left" valign="top">Non-English</td></tr><tr><td align="left" valign="top">Publication date</td><td align="left" valign="top">Published between January 2022 and December 2024</td><td align="left" valign="top">Published before 2022 or after December 2024</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>LLM: large language model.</p></fn><fn id="table1fn2"><p><sup>b</sup>BERT: bidirectional encoder representations from transformers.</p></fn><fn id="table1fn3"><p><sup>c</sup>AI: artificial intelligence.</p></fn><fn id="table1fn4"><p><sup>d</sup>Not applicable (scoping review design).</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-3"><title>Information Sources and Search Strategy</title><p>The databases were selected to ensure coverage across clinical (PubMed), multidisciplinary (Scopus), and technical and engineering (IEEE Xplore) domains. The search combined MeSH (Medical Subject Headings) and free-text terms related to LLMs (&#x201C;large language model,&#x201D; &#x201C;GPT,&#x201D; &#x201C;BERT,&#x201D; and &#x201C;transformer-based AI&#x201D;) and radiology (&#x201C;radiology,&#x201D; &#x201C;medical imaging,&#x201D; and &#x201C;diagnostic imaging&#x201D;).</p><p>Database-specific search strings tailored to syntax and operators are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Gray literature (eg, arXiv and medRxiv) and conference proceedings were excluded, which may have limited capture of emerging non&#x2013;peer-reviewed work. Furthermore, the use of MeSH terms in PubMed was optimized but may not have fully captured all relevant variations due to evolving terminology in this rapidly developing field. These limitations may have affected the comprehensiveness of the search and should be considered when interpreting the findings.</p></sec><sec id="s2-4"><title>Study Selection</title><p>All retrieved records were imported into Rayyan [<xref ref-type="bibr" rid="ref11">11</xref>] (Qatar Computing Research Institute), a web-based tool designed to facilitate systematic and scoping review workflows. Rayyan facilitated duplicate removal and blinded screening. Two reviewers (AA and IR) independently screened titles and abstracts and assessed full texts against the eligibility criteria. Disagreements were resolved through consensus or, if needed, by a third reviewer (RS). To ensure calibration, an initial pilot screening was conducted, and a random 20% sample of the included studies was cross-checked. The study selection process is presented in the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) 2020 flow diagram.</p></sec><sec id="s2-5"><title>Data Extraction Strategy</title><p>A structured data extraction form was developed and piloted on a sample of 5 studies. The following data were collected:</p><list list-type="bullet"><list-item><p>Publication details (year, country, and journal)</p></list-item><list-item><p>LLM type (eg, GPT-3.5, GPT-4, BERT, or domain-specific models)</p></list-item><list-item><p>Radiology use case (eg, classification, report generation, decision support)</p></list-item><list-item><p>Data modalities (text, images, multimodal, or radiology information systems [RISs])</p></list-item><list-item><p>Evaluation metrics (eg, accuracy, bilingual evaluation understudy [BLEU], recall-oriented understudy for gisting evaluation [ROUGE], Matthews correlation coefficient, area under the curve, and <italic>F</italic><sub>1</sub>-score)</p></list-item><list-item><p>Dataset characteristics (size, source, and multicenter vs single center)</p></list-item><list-item><p>Reported outcomes and limitations</p></list-item></list><p>Data extraction was conducted independently by 2 reviewers. A random 20% subset was cross-checked for accuracy, with discrepancies resolved through consensus.</p></sec><sec id="s2-6"><title>Secondary Data Extraction and Thematic Classification</title><sec id="s2-6-1"><title>Data Extraction and Coding</title><p>A hybrid thematic analysis was conducted. Initially, themes were extracted manually by 3 independent raters who analyzed and categorized the data. An interrater reliability measure (percentage of agreement) was applied to ensure consistency across raters. Subsequently, GPT-4 was used to assist with clustering recurring patterns using a zero-shot prompt. The prompt applied was as follows: &#x201C;Act as a pseudo analyst, read this file (Excel file with the raw data), and label abstracts with relevant codes. Provide a summary of recurring themes.&#x201D;</p><p>The outputs generated by GPT-4 were then compared and triangulated with the manually derived results by an additional expert reviewer, who was provided with (1) the original raw Microsoft Excel file and (2) GPT-4&#x2019;s preliminary coding and theme map. Discrepancies between manual and AI-assisted outputs were discussed in a consensus meeting, and revisions were made to finalize the thematic framework.</p><p>It should be noted that GPT-4 was not used during the initial manual theme extraction, which was conducted independently by the 3 student raters. The use of GPT-4 in the subsequent phase was intended to support rather than replace human analytical judgment and ensure that AI-generated outputs were critically appraised before integration.</p></sec><sec id="s2-6-2"><title>Theme Development</title><p>Through inductive synthesis, the extracted codes were grouped into broader categories that reflected the primary ways in which LLMs are currently being explored in radiology. After multiple rounds of refinement, three overarching themes were established: (1) decision support, including diagnostic support, case prioritization, and aiding clinical judgments; (2) report generation, encompassing drafting, summarization, and improving clarity or standardization of radiology reports; and (3) workflow optimization, referring to efficiency gains such as automating routine tasks, assisting communication, and integrating radiology processes into clinical workflows.</p><p>This thematic classification was not predetermined but derived from recurring patterns across the reviewed material. GPT-4 was used as a supporting tool to enhance coding efficiency and cross-check clustering of concepts, whereas the final themes were reviewed, validated, and confirmed manually by the research team.</p><p>By systematically identifying and categorizing these themes, the analysis provided a structured synthesis of the literature while ensuring methodological transparency and reproducibility.</p></sec></sec><sec id="s2-7"><title>Narrative Quality Assessment</title><p>Although a formal risk-of-bias assessment was not performed in accordance with scoping review methodology, a narrative appraisal revealed several recurring limitations in the included studies. Many were small-scale, single-institution implementations or proof-of-concept projects, with limited external validation. Most lacked robust methodological descriptions or standardized evaluation metrics, making cross-study comparisons challenging.</p><p>In terms of dataset size and representativeness, several studies relied on relatively small or synthetic datasets, often drawn from publicly available repositories rather than real-world clinical systems. This raises concerns about generalizability. Geographically, a substantial proportion of the studies originated from North America, Europe, and China, indicating potential regional bias in the development and evaluation of LLMs for radiology. There was limited representation from low- and middle-income countries, which may affect the global applicability of the findings.</p></sec><sec id="s2-8"><title>Critical Reflection on Methodology</title><p>This review used a rigorous and transparent methodology; however, certain limitations must be acknowledged. Restriction to English-language studies and the exclusion of gray literature may have limited comprehensiveness. The fast pace of LLM development also means that new studies may have emerged since the search was conducted. Finally, thematic synthesis, while appropriate for mapping breadth, is interpretive and may introduce subjectivity despite the use of calibration and consensus procedures.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview of the Included Studies</title><p>A total of 1111 records were retrieved from Scopus (n=407, 36.6%), PubMed (n=568, 51.1%), and IEEE Xplore (n=136, 12.2%). Of these 1111 records, after removing 535 (48.2%) duplicates and 18 (1.6%) irrelevant records, 558 (50.2%) studies remained. Following title and abstract screening, 163 full-text articles were reviewed, and 67 (41.1%) met the inclusion criteria (<xref ref-type="fig" rid="figure1">Figure 1</xref>). A summary of all included articles is presented in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) 2020 flow diagram detailing the study selection process for the included records across databases. LLM: large language model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e78041_fig01.png"/></fig><p>Most studies (44/67, 65.7%) were published in 2024, reflecting a sharp rise in interest following the release of GPT-4 in March 2023 (<xref ref-type="fig" rid="figure2">Figure 2</xref>). Geographically, the United States contributed the most studies (24/67, 35.8%), followed by Japan (10/67, 14.9%) and Germany (10/67, 15%). Very few studies originated from low- and middle-income countries, and a few studies assessed non&#x2013;English-language corpora (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Annual and cumulative number of publications applying large language models in radiology (2020&#x2010;2024). Data derived from the included studies (N=67). Milestones for the release of GPT-3 (June 2020) and GPT-4 (March 2023) are annotated.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e78041_fig02.png"/></fig></sec><sec id="s3-2"><title>Types of LLMs and Implementation Approaches</title><p>GPT-4 was the most frequently studied model (28/67, 42%), followed by GPT-3.5 (14/67, 21%). A smaller proportion (n/N, %)used BERT-based models such as CheXbert and BioBERT or domain-specific variants, including Radiology-Llama2 and RadSpaT5. Multimodal models capable of integrating text and images were reported in 17.9% (12/67) of the studies, although few underwent clinical validation.</p><p>Regarding input data, 64% (43/67) of the studies used text-based corpora such as radiology reports, request forms, or quizzes; 15% (10/67) analyzed images; 18% (12/67) used multimodal datasets; and 3% (2/67) used either RIS data or exam question datasets (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). Of the 67 studies, 56 (84%) used English-language corpora (English language only: n=50, 89%; mixed English+another language: n=6, 11%), and 11 (16%) used only corpora in non-English languages (German: n=4, 36%; Japanese: n=4, 36%; Italian: n=2, 18%; French: n=1, 9%). Most studies (53/67, 79%) were single center, whereas 21% (14/67) were multicenter.</p></sec><sec id="s3-3"><title>Imaging Modalities and Radiological Subspecialties</title><p>Imaging modality use varied across the studies (<xref ref-type="fig" rid="figure3">Figures 3</xref> and <xref ref-type="fig" rid="figure4">4</xref>). <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> shows the distribution of the 67 studies across various radiology subspecialties. The most represented field was thoracic imaging with 24% (16/67) of the studies, followed by general radiology (13/67, 19%) and oncologic imaging (11/67, 16%).</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Imaging modalities used, stratified by data type (N=67). Most studies relied on text-only data (yellow), with fewer using image-only (blue) or multimodal text+image datasets (green) datasets. CT: computed tomography; MRI: magnetic resonance imaging; PET: positron emission tomography.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e78041_fig03.png"/></fig><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Imaging modality by study objective (N=67). Decision support (yellow) predominated, followed by report generation (blue) and workflow optimization (green). Positron emission tomography (PET) and ultrasound were the least represented. CT: computed tomography; MRI: magnetic resonance imaging.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e78041_fig04.png"/></fig></sec><sec id="s3-4"><title>Thematic Domains of Application</title><sec id="s3-4-1"><title>Overview</title><p><xref ref-type="table" rid="table2">Table 2</xref> shows the 3 thematic domains that emerged (detailed thematic domains and models are presented in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Thematic classification of large language model applications in radiology across the 67 included studies (2022&#x2010;2024).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Theme and subtheme</td><td align="left" valign="bottom">Articles</td></tr><tr><td align="left" valign="bottom" colspan="2">Decision support</td></tr></thead><tbody><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Classification</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Blankemeier et al [<xref ref-type="bibr" rid="ref12">12</xref>]</p></list-item><list-item><p>Chambon et al [<xref ref-type="bibr" rid="ref13">13</xref>]</p></list-item><list-item><p>Fervers et al [<xref ref-type="bibr" rid="ref14">14</xref>]</p></list-item><list-item><p>Haver et al [<xref ref-type="bibr" rid="ref15">15</xref>]</p></list-item><list-item><p>Olivato et al [<xref ref-type="bibr" rid="ref16">16</xref>]</p></list-item><list-item><p>Putelli et al [<xref ref-type="bibr" rid="ref17">17</xref>]</p></list-item><list-item><p>Santos et al [<xref ref-type="bibr" rid="ref18">18</xref>]</p></list-item><list-item><p>Sehanobish et al [<xref ref-type="bibr" rid="ref19">19</xref>]</p></list-item><list-item><p>Suzuki et al [<xref ref-type="bibr" rid="ref20">20</xref>]</p></list-item><list-item><p>Wu et al [<xref ref-type="bibr" rid="ref21">21</xref>]</p></list-item><list-item><p>Zhang et al [<xref ref-type="bibr" rid="ref22">22</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Diagnosis from clinical cases</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Danu et al [<xref ref-type="bibr" rid="ref23">23</xref>]</p></list-item><list-item><p>Horiuchi et al [<xref ref-type="bibr" rid="ref24">24</xref>]</p></list-item><list-item><p>Horiuchi et al [<xref ref-type="bibr" rid="ref25">25</xref>]</p></list-item><list-item><p>Kurokawa et al [<xref ref-type="bibr" rid="ref26">26</xref>]</p></list-item><list-item><p>Wada et al [<xref ref-type="bibr" rid="ref27">27</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Diagnosis from images</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Elek et al [<xref ref-type="bibr" rid="ref28">28</xref>]</p></list-item><list-item><p>Khare et al [<xref ref-type="bibr" rid="ref29">29</xref>]</p></list-item><list-item><p>Pachade et al [<xref ref-type="bibr" rid="ref30">30</xref>]</p></list-item><list-item><p>Busch et al [<xref ref-type="bibr" rid="ref31">31</xref>]</p></list-item><list-item><p>Silva et al [<xref ref-type="bibr" rid="ref32">32</xref>]</p></list-item><list-item><p>Wu et al [<xref ref-type="bibr" rid="ref33">33</xref>]</p></list-item><list-item><p>Kottlors et al [<xref ref-type="bibr" rid="ref34">34</xref>]</p></list-item><list-item><p>Overgaard Olesen et al [<xref ref-type="bibr" rid="ref35">35</xref>]</p></list-item><list-item><p>Lee et al [<xref ref-type="bibr" rid="ref36">36</xref>]</p></list-item><list-item><p>Reith et al [<xref ref-type="bibr" rid="ref37">37</xref>]</p></list-item><list-item><p>Horiuchi et al [<xref ref-type="bibr" rid="ref38">38</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Extracting information from reports</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Mukherjee et al [<xref ref-type="bibr" rid="ref39">39</xref>]</p></list-item><list-item><p>Bressem et al [<xref ref-type="bibr" rid="ref4">4</xref>]</p></list-item><list-item><p>Tan et al [<xref ref-type="bibr" rid="ref40">40</xref>]</p></list-item><list-item><p>Tay et al [<xref ref-type="bibr" rid="ref41">41</xref>]</p></list-item><list-item><p>Russe et al [<xref ref-type="bibr" rid="ref42">42</xref>]</p></list-item><list-item><p>Le Guellec et al [<xref ref-type="bibr" rid="ref43">43</xref>]</p></list-item><list-item><p>Lybarger et al [<xref ref-type="bibr" rid="ref44">44</xref>]</p></list-item><list-item><p>Dada et al [<xref ref-type="bibr" rid="ref45">45</xref>]</p></list-item><list-item><p>Sun et al [<xref ref-type="bibr" rid="ref46">46</xref>]</p></list-item><list-item><p>Bhayana et al [<xref ref-type="bibr" rid="ref47">47</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Summarization</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Wu and Bibault [<xref ref-type="bibr" rid="ref48">48</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top" colspan="2">Report generation</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Generating the report</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Danu et al [<xref ref-type="bibr" rid="ref49">49</xref>]</p></list-item><list-item><p>Hasani et al [<xref ref-type="bibr" rid="ref50">50</xref>]</p></list-item><list-item><p>Ji et al [<xref ref-type="bibr" rid="ref51">51</xref>]</p></list-item><list-item><p>L&#x00F3;pez-&#x00DA;beda et al [<xref ref-type="bibr" rid="ref52">52</xref>]</p></list-item><list-item><p>Mallio et al [<xref ref-type="bibr" rid="ref53">53</xref>]</p></list-item><list-item><p>Moezzi et al [<xref ref-type="bibr" rid="ref54">54</xref>]</p></list-item><list-item><p>Nakaura et al [<xref ref-type="bibr" rid="ref55">55</xref>]</p></list-item><list-item><p>Selivanov et al [<xref ref-type="bibr" rid="ref56">56</xref>]</p></list-item><list-item><p>Shentu and Al Moubayed [<xref ref-type="bibr" rid="ref57">57</xref>]</p></list-item><list-item><p>Soleimani et al [<xref ref-type="bibr" rid="ref58">58</xref>]</p></list-item><list-item><p>Wo&#x017A;nicki et al [<xref ref-type="bibr" rid="ref59">59</xref>]</p></list-item><list-item><p>Wu et al [<xref ref-type="bibr" rid="ref60">60</xref>]</p></list-item><list-item><p>Bhayana et al [<xref ref-type="bibr" rid="ref61">61</xref>]</p></list-item><list-item><p>Tie et al [<xref ref-type="bibr" rid="ref62">62</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Summarization</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Karn et al [<xref ref-type="bibr" rid="ref63">63</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Quality of complex reports</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Zhu et al [<xref ref-type="bibr" rid="ref64">64</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top" colspan="2">Workflow optimization</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Selecting appropriate modality from radiology order</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Gertz et al [<xref ref-type="bibr" rid="ref65">65</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Image quality</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Chen et al [<xref ref-type="bibr" rid="ref66">66</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Exam questions</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Mistry et al [<xref ref-type="bibr" rid="ref67">67</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Summarization</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Nishio et al [<xref ref-type="bibr" rid="ref68">68</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Classification</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Yasaka et al [<xref ref-type="bibr" rid="ref69">69</xref>]</p></list-item><list-item><p>Huemann et al [<xref ref-type="bibr" rid="ref70">70</xref>]</p></list-item><list-item><p>Kanzawa et al [<xref ref-type="bibr" rid="ref71">71</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>User interface improvement</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Zhang et al [<xref ref-type="bibr" rid="ref72">72</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Identification of reports containing recommendations</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Abbasi et al [<xref ref-type="bibr" rid="ref73">73</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Detection of errors</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Kathait et al [<xref ref-type="bibr" rid="ref74">74</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Simplification of reports for patients</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Sarangi et al [<xref ref-type="bibr" rid="ref75">75</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Answering patient questions</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Rogasch et al [<xref ref-type="bibr" rid="ref76">76</xref>]</p></list-item></list></td></tr></tbody></table></table-wrap></sec><sec id="s3-4-2"><title>Theme 1: AI-Assisted Clinical Decision Support</title><p>Four subthemes emerged from this theme.</p><sec id="s3-4-2-1"><title>Classification Tasks</title><p>Across radiology classification tasks, domain-tuned transformers remained the most reliable, whereas general LLMs were mixed. BERT-style models standardized the Thyroid Imaging Reporting and Data System and matched or exceeded radiologists for chest x-ray report extraction [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], with added interpretability and effectiveness in Italian reports [<xref ref-type="bibr" rid="ref17">17</xref>]. GPT-3.5 and GPT-4 underperformed or were inconsistent for the Liver Imaging Reporting and Data System and tumor node metastasis staging [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref20">20</xref>], although structured Reporting and Data System categorization showed promise [<xref ref-type="bibr" rid="ref21">21</xref>]. The multimodal GPT-4V struggled to describe Breast Imaging Reporting and Data System features [<xref ref-type="bibr" rid="ref15">15</xref>], whereas specialized models such as RadBERT and a 3D vision language model (Merlin) achieved strong document-level COVID-19 classification and surpassed other models. Overall, BERT-family and domain-adapted approaches are currently more dependable than generic LLMs for clinical deployment.</p></sec><sec id="s3-4-2-2"><title>Diagnosis From Clinical Cases</title><p>Across clinical case diagnosis, general LLMs remained inconsistent and typically trailed expert radiologists. GPT-4 reached approximately 50% overall accuracy on neuroradiology cases of the week, performing far worse on central nervous system tumors (16%) than on non&#x2013;central nervous system tumors (62%) [<xref ref-type="bibr" rid="ref23">23</xref>]. In musculoskeletal cases, text-only GPT-4 was roughly at the resident level but below board-certified radiologists, whereas GPT-4V lagged further [<xref ref-type="bibr" rid="ref23">23</xref>]. On challenging cases from the Freiburg Neuropathology Case Conference, both GPT-4 and GPT-4V underperformed compared to radiologists [<xref ref-type="bibr" rid="ref24">24</xref>]. Among Anthropic models, Claude 3.5 Sonnet outperformed Claude 3 Opus, with accuracy improving when both clinical history and imaging were provided, yet differential diagnosis listing remained limited [<xref ref-type="bibr" rid="ref26">26</xref>]. Targeted prompt engineering and confidence thresholds measurably boosted GPT-4 Turbo&#x2019;s diagnostic accuracy, highlighting the value of workflow tuning [<xref ref-type="bibr" rid="ref27">27</xref>]</p></sec><sec id="s3-4-2-3"><title>Diagnosis From Images</title><p>General LLMs were promising but not yet dependable. GPT-4 (via Bing) was able to recognize basic computed tomography (CT) and magnetic resonance imaging (MRI) features but lacked diagnostic reliability [<xref ref-type="bibr" rid="ref28">28</xref>]. Multimodal and domain-tuned models fared better: mmBERT set a new visual question answering state of the art with interpretable attention maps [<xref ref-type="bibr" rid="ref29">29</xref>], and self-supervised Contrastive Language-Image Pretraining improved large-vessel occlusion detection over supervised baselines [<xref ref-type="bibr" rid="ref30">30</xref>]. GPT-4V showed potential across subspecialties but should complement clinicians, not replace them [<xref ref-type="bibr" rid="ref31">31</xref>], and GPT-3.5 showed variable accuracy and should be considered as supplementary&#x2014;not stand-alone&#x2014;for dental panoramic radiographs [<xref ref-type="bibr" rid="ref32">32</xref>]. Pairing LLMs with image-to-text modules boosted diagnostic performance in thyroid ultrasound [<xref ref-type="bibr" rid="ref33">33</xref>]. For differential diagnosis, GPT-4 reached 68.8% concordance with experts (93.8% of outputs were acceptable), with best results in neuroradiology and chest x-rays, yet task performance varied [<xref ref-type="bibr" rid="ref34">34</xref>] and remained limited for specific findings such as pulmonary congestion [<xref ref-type="bibr" rid="ref35">35</xref>]. Broadly, LLMs were able to propose differentials but were not reliable for independent use [<xref ref-type="bibr" rid="ref38">38</xref>]; specialized vision models such as KARA-CXR currently outperform ChatGPT in chest x-ray interpretation [<xref ref-type="bibr" rid="ref36">36</xref>]. GPT-4, even with single-shot prompts, identified incidental findings with high precision and recall from CT scans. In contrast, multimodal LLMs remain inadequate for pediatric image interpretation [<xref ref-type="bibr" rid="ref37">37</xref>].</p></sec><sec id="s3-4-2-4"><title>Extracting Information From Reports</title><p>Domain-tuned transformers consistently excelled. BERT variants, especially RadBERT, surpassed other text report classifiers with less annotation in extracting findings from intensive care chest radiograph reports [<xref ref-type="bibr" rid="ref4">4</xref>], and SpERT achieved high anatomy-linked extraction [<xref ref-type="bibr" rid="ref44">44</xref>]. Large clinical models also performed strongly: GatorTron reached high accuracy for cancer disease response [<xref ref-type="bibr" rid="ref40">40</xref>], and an information extraction pipeline inferred metastatic sites accurately and explainably [<xref ref-type="bibr" rid="ref41">41</xref>]. The open-source Vicuna showed excellent accuracy on emergency brain MRI reports without additional training [<xref ref-type="bibr" rid="ref43">43</xref>].</p></sec></sec></sec><sec id="s3-5"><title>Theme 2: LLMs for Report Generation and Quality</title><p>In total, 22.4% (15/67) of the studies examined LLMs for generating, structuring, or evaluating radiology reports, falling into 2 streams.</p><sec id="s3-5-1"><title>Text to Text</title><p>These systems converted free text into structured outputs or summaries: T5 and SciFive performed relation extraction to produce clinician-interpretable structured reports [<xref ref-type="bibr" rid="ref54">54</xref>], fine-tuned T5 yielded near-expert MRI conclusions in Spanish [<xref ref-type="bibr" rid="ref52">52</xref>], and Llama 2-70B locally structured reports with approximate human accuracy but variable semantics across languages and findings [<xref ref-type="bibr" rid="ref59">59</xref>]. GPT-4 improved standardization and generated reports with higher clarity and conciseness than those of human reports but lower diagnostic precision [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. It produced the most reliable report templates versus Perplexity, GPT-3.5, and Bing [<xref ref-type="bibr" rid="ref53">53</xref>]. PEGASUS generated clinically acceptable personalized positron emission tomography (PET) impressions [<xref ref-type="bibr" rid="ref62">62</xref>].</p></sec><sec id="s3-5-2"><title>Image to Text</title><p>These pipelines enhanced captioning and paired reports. CXR-IRGen outperformed baselines for chest x-ray image-report pairs [<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref57">57</xref>], and a Bloomz-7B1 2-step model (image&#x2192;abnormality&#x2192;report) was promising and has potential to reduce workload [<xref ref-type="bibr" rid="ref49">49</xref>]. GPT-4 consistently emerged as the most robust model across multiple benchmarks [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref58">58</xref>], offering both high readability and coherence, although challenges in diagnostic precision and handling rare findings remain. All 4 studies in this theme showed that LLMs matched or exceeded baseline performance metrics such as BLEU, ROUGE, and Consensus-Based Image Description Evaluation for radiology report generation [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>]. Integration of domain-adaptive training or prompt tuning improved model performance, underscoring the importance of radiology-specific fine-tuning.</p></sec></sec><sec id="s3-6"><title>Theme 3: Clinical Workflow Optimization</title><p>total of 17.9% (12/67) of the studies evaluated how LLMs can optimize various nondiagnostic tasks in clinical workflows. This theme included 6 subthemes.</p><sec id="s3-6-1"><title>Summarization and Simplification</title><p>LLMs supported patient&#x2010;facing and clinician&#x2010;to&#x2010;clinician communication. GPT-3.5 reliably simplified radiology reports into plain language while preserving salient clinical details [<xref ref-type="bibr" rid="ref75">75</xref>]. Text-to-text transformers (eg, RadSpaT5 and T5) achieved expert-level abstractive summaries, producing accurate report conclusions in most cases [<xref ref-type="bibr" rid="ref68">68</xref>].</p></sec><sec id="s3-6-2"><title>Classification of Reports</title><p>Fine-tuned BERT models accurately categorized brain MRI reports into treatment-related groups and identified lung cancer pretreatment cases with performance comparable to that of human experts [<xref ref-type="bibr" rid="ref69">69</xref>,<xref ref-type="bibr" rid="ref71">71</xref>]. Domain-adapted variants (BioClinicalBERT and RadBERT) further improved PET and CT report classification, highlighting the value of specialty-specific pretraining [<xref ref-type="bibr" rid="ref70">70</xref>].</p></sec><sec id="s3-6-3"><title>Error Detection and Recommendation Extraction</title><p>LLMs showed high precision in identifying diagnostic inaccuracies and extracting actionable recommendations. The Augmented Transformer Assisted Radiology Intelligence model, which integrates both vision and language processing, significantly outperformed traditional natural language processing approaches in detecting laterality errors within reports [<xref ref-type="bibr" rid="ref74">74</xref>]. A BERT-based model identified reports containing recommendations for additional imaging with high precision and recall, enabling automated recommendation extraction [<xref ref-type="bibr" rid="ref73">73</xref>].</p></sec><sec id="s3-6-4"><title>Radiology Protocol Selection and Answering Patient Queries</title><p>GPT-4 accurately selected imaging modalities and protocols from referral forms, indicating potential to streamline protocoling tasks [<xref ref-type="bibr" rid="ref65">65</xref>]. It also answered common patient questions regarding PET and CT preparation and reporting as a supplementary education tool [<xref ref-type="bibr" rid="ref76">76</xref>].</p></sec><sec id="s3-6-5"><title>User Interface Enhancement</title><p>User interface enhancement was explored through models such as ChatUI-RIS, which improved the usability of RISs by offering a more intuitive interface and enhanced learning experiences, particularly for trainees and junior radiologists [<xref ref-type="bibr" rid="ref72">72</xref>].</p></sec><sec id="s3-6-6"><title>Image Quality Assessment and Educational Use</title><p>Multimodal LLMs with visual understanding (eg, IQAGPT) provided effective CT image quality assessment [<xref ref-type="bibr" rid="ref66">66</xref>]. For education, GPT-4 generated high-quality board-style multiple-choice questions (ie, questions at the level of those on a board examination) and rationales for radiology curricula [<xref ref-type="bibr" rid="ref67">67</xref>].</p></sec></sec><sec id="s3-7"><title>Model Performance Across Applications</title><p>Performance varied widely across tasks (<xref ref-type="table" rid="table3">Table 3</xref>; the full metrics can be found in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendices 2</xref> and <xref ref-type="supplementary-material" rid="app6">6</xref>). Models fine-tuned on domain-specific corpora (eg, RadBERT, BioClinicalBERT, and Japanese BERT variants) consistently outperformed general-purpose LLMs in structured classification and report-based tasks, often achieving accuracies of &#x003E;95% [<xref ref-type="bibr" rid="ref69">69</xref>,<xref ref-type="bibr" rid="ref71">71</xref>,<xref ref-type="bibr" rid="ref73">73</xref>].</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Summary of performance ranges across the included studies. The lowest and highest reported values are shown where available. Data were extracted from <xref ref-type="supplementary-material" rid="app2">Multimedia Appendices 2</xref><xref ref-type="supplementary-material" rid="app3"/>-<xref ref-type="supplementary-material" rid="app4">4</xref> (N=67).</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Task or application domain and metric</td><td align="left" valign="bottom">Reported range</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Classification</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy (%)</td><td align="left" valign="top">83&#x2010;97</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">0.66&#x2010;1.00</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>AUC<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">0.84&#x2010;0.99</td></tr><tr><td align="left" valign="top" colspan="2">Diagnostic reasoning from clinical cases</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy (%)</td><td align="left" valign="top">16&#x2010;50</td></tr><tr><td align="left" valign="top" colspan="2">Diagnosis from images</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy (%)</td><td align="left" valign="top">25&#x2010;84</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Match rate (%)</td><td align="left" valign="top">48-62</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Concordance (%)</td><td align="left" valign="top">66.7-68.8</td></tr><tr><td align="left" valign="top" colspan="2">Information extraction from radiology reports</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy (%)</td><td align="left" valign="top">83-97</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">0.66&#x2010;1.00</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>AUC</td><td align="left" valign="top">0.84-0.99</td></tr><tr><td align="left" valign="top" colspan="2">Report generation and summarization</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">0.29-0.88</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy (%)</td><td align="left" valign="top">67-89</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Clinical acceptability (physician rated; %)</td><td align="left" valign="top">89</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>BLEU<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> or ROUGE<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup> scores</td><td align="left" valign="top">Variable, generally modest (BLEU: 0.46&#x2010;0.74; ROUGE-L<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup>: 0.37&#x2010;0.61)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Similarity score (%)</td><td align="left" valign="top">98.9-99.3</td></tr><tr><td align="left" valign="top" colspan="2">Quality assessment</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy (%)</td><td align="left" valign="top">70.2-98.3</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>AUC: area under the curve.</p></fn><fn id="table3fn2"><p><sup>b</sup>BLEU: bilingual evaluation understudy.</p></fn><fn id="table3fn3"><p><sup>c</sup>ROUGE: recall-oriented understudy for gisting evaluation.</p></fn><fn id="table3fn4"><p><sup>d</sup>ROUGE-L: recall-oriented understudy for gisting evaluation based on the longest common subsequence.</p></fn></table-wrap-foot></table-wrap><p>In contrast, performance for diagnostic reasoning and image-based tasks remained modest. For instance, GPT-4V achieved only 27% to 35% accuracy in primary and differential diagnoses [<xref ref-type="bibr" rid="ref31">31</xref>], and GPT-4 variants reached &#x003C;25% accuracy in case-based diagnostic challenges [<xref ref-type="bibr" rid="ref23">23</xref>].</p><p>Text-based applications such as error detection [<xref ref-type="bibr" rid="ref74">74</xref>] and structured report inference [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref73">73</xref>] approached human-level accuracy (&#x2265;95%). Image-focused tasks yielded lower values, with rank-1 accuracy as low as 25% [<xref ref-type="bibr" rid="ref32">32</xref>], area under the curve values between 0.80 and 0.83 [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref33">33</xref>], and <italic>F</italic><sub>1</sub>-scores below 0.30 in some generative settings [<xref ref-type="bibr" rid="ref57">57</xref>].</p><p>Report generation and simplification tasks demonstrated variable performance depending on evaluation metrics. While BLEU and ROUGE scores remained modest, physician-rated acceptability and utility scores were encouraging [<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref77">77</xref>], suggesting that automated metrics may underestimate clinical usability. GPT-4 also showed superior performance in exam question generation [<xref ref-type="bibr" rid="ref67">67</xref>] and summarization [<xref ref-type="bibr" rid="ref75">75</xref>].</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><sec id="s4-1-1"><title>Overview</title><p>This scoping review provides the first comprehensive synthesis of LLM applications across all domains of radiology. By mapping 67 studies, we identified 3 main areas of application: clinical decision support, report generation, and workflow optimization. There is evidence suggesting that LLMs are most reliable in structured tasks such as classification, information extraction, and educational support, whereas diagnostic reasoning and visual interpretation remain underdeveloped.</p></sec><sec id="s4-1-2"><title>Decision Support</title><p>GPT-based and BERT models showed strong performance in structured classification tasks such as the Thyroid Imaging Reporting and Data System, the Liver Imaging Reporting and Data System [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref21">21</xref>], fracture coding [<xref ref-type="bibr" rid="ref42">42</xref>], and tumor node metastasis staging [<xref ref-type="bibr" rid="ref20">20</xref>], particularly when domain-specific BERT variants were fine-tuned on radiology data. These models frequently matched or exceeded human performance in multilingual and specialty-specific contexts. In contrast, diagnostic reasoning tasks involving clinical cases or direct image interpretation showed limited and inconsistent performance. General-purpose GPT-4 and GPT-4V models achieved variable accuracy across case-based and imaging tasks, underscoring the immaturity of current multimodal reasoning [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref31">31</xref>].</p></sec><sec id="s4-1-3"><title>Report Generation</title><p>Transformer models such as T5, PEGASUS, and GPT-4 generated radiology reports that were linguistically coherent and frequently rated as clinically acceptable. Physician-rated outcomes often aligned GPT-4 reports with radiologist-written impressions. However, hallucinations and factual inaccuracies persist, particularly in rare or ambiguous cases. Automated linguistic metrics (BLEU and ROUGE) did not always correlate with clinical usability, highlighting the importance of human-centered evaluation. Without factuality scoring and domain-specific safeguards, unsupervised deployment of report generation tools remains premature.</p></sec><sec id="s4-1-4"><title>Workflow Optimization</title><p>While our thematic synthesis identified distinct application domains, we acknowledge that the &#x201C;workflow optimization&#x201D; category is intentionally broad. It encompasses a range of nondiagnostic use cases, including patient education, radiology report simplification, imaging protocol selection, and user interface enhancement. This thematic grouping reflects the expanding role of LLMs in supporting communication, training, and clinical efficiency beyond core diagnostic tasks. Although its breadth may resemble a &#x201C;catch-all,&#x201D; we believe that it accurately represents the dynamic and evolving integration of LLMs into radiological practice. Notably, the most reliable use cases for near-term clinical integration were concentrated in workflow support tasks. These included report simplification, protocol selection [<xref ref-type="bibr" rid="ref73">73</xref>], error identification [<xref ref-type="bibr" rid="ref74">74</xref>], and RIS user interface enhancement [<xref ref-type="bibr" rid="ref72">72</xref>]. Such tasks rely primarily on structured reasoning and language fluency rather than on complex diagnostic inference, making them especially suitable for early implementation. Specialized tools such as Augmented Transformer Assisted Radiology Intelligence (for error detection) [<xref ref-type="bibr" rid="ref74">74</xref>] and ChatUI-RIS (for user interface enhancement) [<xref ref-type="bibr" rid="ref72">72</xref>] outperformed general-purpose LLMs, reinforcing the value of domain adaptation. Educational uses such as generating board-style multiple-choice questions also proved effective, with high user satisfaction and accuracy [<xref ref-type="bibr" rid="ref67">67</xref>]. Taken together, these low-risk, high-utility functions offer a promising entry point for safe and meaningful adoption of LLMs in radiology.</p></sec><sec id="s4-1-5"><title>Emerging Trends</title><p>Two developments were particularly noteworthy. First, multimodal LLMs integrating text and image inputs are moving toward context-aware systems but continue to show high variability in performance and lack prospective validation. Second, domain-specific models such as Radiology-Llama2 and RadSpaT5 demonstrate stronger alignment with radiological terminology but remain underrepresented. Broader external validation and adoption of these models could improve interpretability and clinical fidelity.</p></sec></sec><sec id="s4-2"><title>Methodological Limitations of the Evidence</title><p>Several methodological gaps were consistently observed across the literature. Most studies relied on retrospective, single-center datasets, frequently limited to chest radiographs or neuroradiology, restricting generalizability. Sample sizes were often small, and only 22% of the studies (15/67) reported external validation. Publication bias is likely as studies with positive results may be preferentially published. Heterogeneous reporting of metrics further complicates benchmarking, and the absence of standardized evaluation frameworks for radiology-specific tasks prevents direct comparison across studies.</p></sec><sec id="s4-3"><title>Equity and Global Applicability</title><p>The predominance of English-language publications and Western data sources poses a significant barrier to equitable implementation. Without multilingual evaluation datasets and cross-regional external validation, performance estimates risk being skewed toward English-language and high-resource settings. Ensuring equity and inclusivity in model development and validation is essential for global relevance.</p></sec><sec id="s4-4"><title>Recommendations and Future Work</title><p>Future research should prioritize the following areas:</p><list list-type="order"><list-item><p>Data and validation; assemble diverse, multicenter, and multilingual datasets to improve generalizability. Conduct prospective evaluations across clinical environments.</p></list-item><list-item><p>Evaluation standards; develop radiology-specific factuality and safety benchmarks and ensure standardized reporting of performance and bias assessments.</p></list-item><list-item><p>Human factors; implement human-in-the-loop frameworks for oversight, error mitigation, and usability evaluation.</p></list-item><list-item><p>Governance; establish clear regulatory guidance and accountability standards to ensure transparency and safety in clinical use.</p></list-item></list></sec><sec id="s4-5"><title>Limitations</title><p>This scoping review has several limitations that should be acknowledged to aid interpretation and guide future research.</p><p>First, the search strategy, while designed to be comprehensive, was limited to 3 databases: PubMed, Scopus, and IEEE Xplore. These were selected to capture clinical, biomedical, and technical literature; however, this may have excluded relevant studies indexed in other databases (eg, Embase or Web of Science) or reported in gray literature sources such as arXiv and medRxiv or key conference proceedings (eg, NeurIPS and Medical Image Computing and Computer-Assisted Intervention). This limitation may have led to the omission of emerging or unpublished work.</p><p>Second, although efforts were made to use both free-text and controlled vocabulary (eg, MeSH terms in PubMed), the evolving and inconsistent terminology used to describe LLMs may have affected search sensitivity. Terms such as &#x201C;GPT,&#x201D; &#x201C;LLM,&#x201D; or &#x201C;transformer-based AI&#x201D; may not have been uniformly used across all relevant publications. While the search was iteratively refined and detailed strategies are included in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> to improve reproducibility, some studies may have been inadvertently missed due to terminology mismatch.</p><p>Third, only English-language articles were included. This decision was made to ensure consistency in interpretation and quality appraisal; however, it introduces language bias and may have excluded valuable contributions from non&#x2013;English-speaking regions, particularly in a globally active research field such as AI.</p><p>Fourth, consistent with the framework by Arksey and O&#x2019;Malley [<xref ref-type="bibr" rid="ref10">10</xref>], we did not include a formal quality assessment of the included studies. While appropriate for scoping reviews, future systematic reviews could integrate AI-specific appraisal tools (eg, the Minimum Information About Clinical Artificial Intelligence Modeling checklist and Checklist for Artificial Intelligence in Medical Imaging) to enhance interpretability. Importantly, the performance ranges reported across the studies (<xref ref-type="table" rid="table3">Table 3</xref>) should be approached with caution due to the heterogeneity of study designs, evaluation metrics, datasets, and model versions. Many included studies had proof-of-concept or single-institution designs with limited generalizability. Without standardized benchmarks or head-to-head comparisons, the reported values are best interpreted as illustrative of the field&#x2019;s current status rather than definitive benchmarks.</p><p>Publication bias is a potential concern, particularly given the rapid growth and high visibility of LLM research. Studies with positive or novel findings may be more likely to be published and indexed, whereas negative or inconclusive results may be underrepresented. Although publication bias was not formally assessed, this limitation should be considered when interpreting the results.</p><p>Fifth, while thematic synthesis is useful for structuring a heterogeneous literature, it is inherently interpretive. We mitigated bias by having 2 reviewers code independently and resolve discrepancies through consensus; however, subjective judgment may still have influenced the final thematic map. In addition, studies that addressed multiple tasks were assigned to a single primary category to avoid duplication. Certain subthemes&#x2014;such as classification&#x2014;appear under 2 overarching themes (decision support and workflow optimization). This placement reflects differences in the primary intent (eg, classifying reports and images to support diagnosis vs to streamline workflow), as detailed in the Results section. Finally, while the initial thematic analysis was conducted manually by human researchers, GPT-4 was later used as a supportive tool to assist in clustering and cross-verifying patterns. Given that GPT-4 is a generative and nondeterministic model, the reproducibility of its suggested outputs cannot be fully guaranteed. Therefore, this hybrid approach may introduce potential bias and variability, which should be considered when interpreting the thematic synthesis.</p></sec><sec id="s4-6"><title>Conclusions</title><p>The integration of LLMs into radiology is accelerating but remains uneven across application domains. Structured tasks such as classification and information extraction are approaching maturity, whereas diagnostic reasoning and multimodal interpretation require substantial improvement. Safe clinical deployment will depend not only on technical performance but also on rigorous validation, global inclusivity, and ethical governance.</p></sec></sec></body><back><ack><p>The authors would like to thank the librarian at Sultan Qaboos University for assistance in refining the search strategy and supporting the review process. The authors used ChatGPT (OpenAI; accessed July 2025) to assist with language refinement and proofreading. All scientific interpretations were conducted by the authors.</p></ack><notes><sec><title>Funding</title><p>This research received no specific grant from any funding agency in the public, commercial, or not-for-profit sectors.</p></sec><sec><title>Data Availability</title><p>All data generated or analyzed during this study are included in this published article and its supplementary information files.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: AAZ</p><p>Data curation: AAZ, SL</p><p>Formal analysis: AAZ</p><p>SLMethodology: AAZ, IR, RS, AAA</p><p>Project administration: AAZ</p><p>Supervision: AAZ</p><p>Visualization: IR</p><p>Writing&#x2014;original draft: AAZ, SL</p><p>Writing&#x2014;review and editing: AAZ, SL</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">BERT</term><def><p>bidirectional encoder representations from transformers</p></def></def-item><def-item><term id="abb3">BLEU</term><def><p>bilingual evaluation understudy</p></def></def-item><def-item><term id="abb4">CT</term><def><p>computed tomography</p></def></def-item><def-item><term id="abb5">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb6">MeSH</term><def><p>Medical Subject Headings</p></def></def-item><def-item><term id="abb7">MRI</term><def><p>magnetic resonance imaging</p></def></def-item><def-item><term id="abb8">PET</term><def><p>positron emission tomography</p></def></def-item><def-item><term id="abb9">PRISMA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p></def></def-item><def-item><term id="abb10">PRISMA-ScR</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews</p></def></def-item><def-item><term id="abb11">RIS</term><def><p>radiology information system</p></def></def-item><def-item><term id="abb12">ROUGE</term><def><p>recall-oriented understudy for gisting evaluation</p></def></def-item><def-item><term id="abb13">T5</term><def><p>Text-to-Text Transfer Transformer</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kotkar</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Mahadik</surname><given-names>RS</given-names> </name><name name-style="western"><surname>More</surname><given-names>PG</given-names> </name><name name-style="western"><surname>Thorat</surname><given-names>SA</given-names> </name></person-group><article-title>Comparative analysis of transformer-based large language models (LLMs) for text summarization</article-title><conf-name>2024 1st International Conference on Advanced Computing and Emerging Technologies (ACET)</conf-name><conf-date>Aug 23-24, 2024</conf-date><pub-id pub-id-type="doi">10.1109/ACET61898.2024.10730348</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bluethgen</surname><given-names>C</given-names> </name><name name-style="western"><surname>Van Veen</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zakka</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Best practices for large language models in radiology</article-title><source>Radiology</source><year>2025</year><month>04</month><volume>315</volume><issue>1</issue><fpage>e240528</fpage><pub-id pub-id-type="doi">10.1148/radiol.240528</pub-id><pub-id pub-id-type="medline">40298602</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zaki</surname><given-names>HA</given-names> </name><name name-style="western"><surname>Aoun</surname><given-names>A</given-names> </name><name name-style="western"><surname>Munshi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Abdel-Megid</surname><given-names>H</given-names> </name><name name-style="western"><surname>Nazario-Johnson</surname><given-names>L</given-names> </name><name name-style="western"><surname>Ahn</surname><given-names>SH</given-names> </name></person-group><article-title>The application of large language models for radiologic decision making</article-title><source>J Am Coll Radiol</source><year>2024</year><month>07</month><volume>21</volume><issue>7</issue><fpage>1072</fpage><lpage>1078</lpage><pub-id pub-id-type="doi">10.1016/j.jacr.2024.01.007</pub-id><pub-id pub-id-type="medline">38224925</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bressem</surname><given-names>KK</given-names> </name><name name-style="western"><surname>Adams</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Gaudin</surname><given-names>RA</given-names> </name><etal/></person-group><article-title>Highly accurate classification of chest radiographic reports using a deep learning natural language model pre-trained on 3.8 million text reports</article-title><source>Bioinformatics</source><year>2021</year><month>01</month><day>29</day><volume>36</volume><issue>21</issue><fpage>5255</fpage><lpage>5261</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btaa668</pub-id><pub-id pub-id-type="medline">32702106</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sterling</surname><given-names>NW</given-names> </name><name name-style="western"><surname>Brann</surname><given-names>F</given-names> </name><name name-style="western"><surname>Frisch</surname><given-names>SO</given-names> </name><name name-style="western"><surname>Schrager</surname><given-names>JD</given-names> </name></person-group><article-title>Patient-readable radiology report summaries generated via large language model: safety and quality</article-title><source>J Patient Exp</source><year>2024</year><volume>11</volume><pub-id pub-id-type="doi">10.1177/23743735241259477</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reichenpfader</surname><given-names>D</given-names> </name><name name-style="western"><surname>M&#x00FC;ller</surname><given-names>H</given-names> </name><name name-style="western"><surname>Denecke</surname><given-names>K</given-names> </name></person-group><article-title>A scoping review of large language model based approaches for information extraction from radiology reports</article-title><source>NPJ Digit Med</source><year>2024</year><month>08</month><day>24</day><volume>7</volume><issue>1</issue><fpage>222</fpage><pub-id pub-id-type="doi">10.1038/s41746-024-01219-0</pub-id><pub-id pub-id-type="medline">39182008</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reichenpfader</surname><given-names>D</given-names> </name><name name-style="western"><surname>M&#x00FC;ller</surname><given-names>H</given-names> </name><name name-style="western"><surname>Denecke</surname><given-names>K</given-names> </name></person-group><article-title>Large language model-based information extraction from free-text radiology reports: a scoping review protocol</article-title><source>BMJ Open</source><year>2023</year><month>12</month><day>9</day><volume>13</volume><issue>12</issue><fpage>e076865</fpage><pub-id pub-id-type="doi">10.1136/bmjopen-2023-076865</pub-id><pub-id pub-id-type="medline">38070902</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Busch</surname><given-names>F</given-names> </name><name name-style="western"><surname>Hoffmann</surname><given-names>L</given-names> </name><name name-style="western"><surname>Dos Santos</surname><given-names>DP</given-names> </name><etal/></person-group><article-title>Large language models for structured reporting in radiology: past, present, and future</article-title><source>Eur Radiol</source><year>2025</year><month>05</month><volume>35</volume><issue>5</issue><fpage>2589</fpage><lpage>2602</lpage><pub-id pub-id-type="doi">10.1007/s00330-024-11107-6</pub-id><pub-id pub-id-type="medline">39438330</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nakaura</surname><given-names>T</given-names> </name><name name-style="western"><surname>Ito</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ueda</surname><given-names>D</given-names> </name><etal/></person-group><article-title>The impact of large language models on radiology: a guide for radiologists on the latest innovations in AI</article-title><source>Jpn J Radiol</source><year>2024</year><month>07</month><volume>42</volume><issue>7</issue><fpage>685</fpage><lpage>696</lpage><pub-id pub-id-type="doi">10.1007/s11604-024-01552-0</pub-id><pub-id pub-id-type="medline">38551772</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arksey</surname><given-names>H</given-names> </name><name name-style="western"><surname>O&#x2019;Malley</surname><given-names>L</given-names> </name></person-group><article-title>Scoping studies: towards a methodological framework</article-title><source>Int J Soc Res Methodol</source><year>2005</year><volume>8</volume><issue>1</issue><fpage>19</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1080/1364557032000119616</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="web"><article-title>Faster systematic literature reviews</article-title><source>Rayyan</source><access-date>2025-11-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.rayyan.ai/">https://www.rayyan.ai/</ext-link></comment></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Blankemeier</surname><given-names>L</given-names> </name><name name-style="western"><surname>Cohen</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>A</given-names> </name></person-group><article-title>Merlin: a vision language foundation model for 3D computed tomography</article-title><source>Res Sq</source><comment>Preprint posted online on  Jun 28, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2406.06512</pub-id><pub-id pub-id-type="medline">38978576</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chambon</surname><given-names>P</given-names> </name><name name-style="western"><surname>Cook</surname><given-names>TS</given-names> </name><name name-style="western"><surname>Langlotz</surname><given-names>CP</given-names> </name></person-group><article-title>Improved fine-tuning of in-domain transformer model for inferring COVID-19 presence in multi-institutional radiology reports</article-title><source>J Digit Imaging</source><year>2023</year><month>02</month><volume>36</volume><issue>1</issue><fpage>164</fpage><lpage>177</lpage><pub-id pub-id-type="doi">10.1007/s10278-022-00714-8</pub-id><pub-id pub-id-type="medline">36323915</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fervers</surname><given-names>P</given-names> </name><name name-style="western"><surname>Hahnfeldt</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kottlors</surname><given-names>J</given-names> </name><etal/></person-group><article-title>ChatGPT yields low accuracy in determining LI-RADS scores based on free-text and structured radiology reports in German language</article-title><source>Front Radiol</source><year>2024</year><month>07</month><day>5</day><volume>4</volume><fpage>1390774</fpage><pub-id pub-id-type="doi">10.3389/fradi.2024.1390774</pub-id><pub-id pub-id-type="medline">39036542</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Haver</surname><given-names>HL</given-names> </name><name name-style="western"><surname>Bahl</surname><given-names>M</given-names> </name><name name-style="western"><surname>Doo</surname><given-names>FX</given-names> </name><etal/></person-group><article-title>Evaluation of multimodal ChatGPT (GPT-4V) in describing mammography image features</article-title><source>Can Assoc Radiol J</source><year>2024</year><month>11</month><volume>75</volume><issue>4</issue><fpage>947</fpage><lpage>949</lpage><pub-id pub-id-type="doi">10.1177/08465371241247043</pub-id><pub-id pub-id-type="medline">38581353</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Olivato</surname><given-names>M</given-names> </name><name name-style="western"><surname>Putelli</surname><given-names>L</given-names> </name><name name-style="western"><surname>Arici</surname><given-names>N</given-names> </name><name name-style="western"><surname>Emilio Gerevini</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lavelli</surname><given-names>A</given-names> </name><name name-style="western"><surname>Serina</surname><given-names>I</given-names> </name></person-group><article-title>Language models for hierarchical classification of radiology reports with attention mechanisms, BERT, and GPT-4</article-title><source>IEEE Access</source><year>2024</year><volume>12</volume><fpage>69710</fpage><lpage>69727</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2024.3402066</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Putelli</surname><given-names>L</given-names> </name><name name-style="western"><surname>Gerevini</surname><given-names>AE</given-names> </name><name name-style="western"><surname>Lavelli</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mehmood</surname><given-names>T</given-names> </name><name name-style="western"><surname>Serina</surname><given-names>I</given-names> </name></person-group><article-title>On the behaviour of BERT&#x2019;s attention for the classification of medical reports</article-title><access-date>2025-11-11</access-date><conf-name>Italian Workshop on Explainable Artificial Intelligence 2022</conf-name><conf-date>Nov 28 to Dec 3, 2022</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://ceur-ws.org/Vol-3277/paper2.pdf">https://ceur-ws.org/Vol-3277/paper2.pdf</ext-link></comment></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Santos</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kallas</surname><given-names>ON</given-names> </name><name name-style="western"><surname>Newsome</surname><given-names>J</given-names> </name><name name-style="western"><surname>Rubin</surname><given-names>D</given-names> </name><name name-style="western"><surname>Gichoya</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Banerjee</surname><given-names>I</given-names> </name></person-group><article-title>A fusion NLP model for the inference of standardized thyroid nodule malignancy scores from radiology report text</article-title><source>AMIA Annu Symp Proc</source><year>2022</year><month>02</month><day>21</day><volume>2021</volume><fpage>1079</fpage><lpage>1088</lpage><pub-id pub-id-type="medline">35308953</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Sehanobish</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kannan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Abraham</surname><given-names>N</given-names> </name><name name-style="western"><surname>Das</surname><given-names>A</given-names> </name><name name-style="western"><surname>Odry</surname><given-names>B</given-names> </name></person-group><article-title>Meta-learning pathologies from radiology reports using variance aware prototypical networks</article-title><conf-name>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</conf-name><conf-date>Dec 7-11, 2022</conf-date><pub-id pub-id-type="doi">10.18653/v1/2022.emnlp-industry.34</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Suzuki</surname><given-names>K</given-names> </name><name name-style="western"><surname>Yamada</surname><given-names>H</given-names> </name><name name-style="western"><surname>Yamazaki</surname><given-names>H</given-names> </name><name name-style="western"><surname>Honda</surname><given-names>G</given-names> </name><name name-style="western"><surname>Sakai</surname><given-names>S</given-names> </name></person-group><article-title>Preliminary assessment of TNM classification performance for pancreatic cancer in Japanese radiology reports using GPT-4</article-title><source>Jpn J Radiol</source><year>2025</year><month>01</month><volume>43</volume><issue>1</issue><fpage>51</fpage><lpage>55</lpage><pub-id pub-id-type="doi">10.1007/s11604-024-01643-y</pub-id><pub-id pub-id-type="medline">39162781</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Li</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Evaluating large language models for automated reporting and data systems categorization: cross-sectional study</article-title><source>JMIR Med Inform</source><year>2024</year><month>07</month><day>17</day><volume>12</volume><fpage>e55799</fpage><pub-id pub-id-type="doi">10.2196/55799</pub-id><pub-id pub-id-type="medline">39018102</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Development and multicenter validation of chest X-ray radiography interpretations based on natural language processing</article-title><source>Commun Med (Lond)</source><year>2021</year><month>10</month><day>28</day><volume>1</volume><fpage>43</fpage><pub-id pub-id-type="doi">10.1038/s43856-021-00043-x</pub-id><pub-id pub-id-type="medline">35602222</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Horiuchi</surname><given-names>D</given-names> </name><name name-style="western"><surname>Tatekawa</surname><given-names>H</given-names> </name><name name-style="western"><surname>Shimono</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Accuracy of ChatGPT generated diagnosis from patient&#x2019;s medical history and imaging findings in neuroradiology cases</article-title><source>Neuroradiology</source><year>2024</year><month>01</month><volume>66</volume><issue>1</issue><fpage>73</fpage><lpage>79</lpage><pub-id pub-id-type="doi">10.1007/s00234-023-03252-4</pub-id><pub-id pub-id-type="medline">37994939</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Horiuchi</surname><given-names>D</given-names> </name><name name-style="western"><surname>Tatekawa</surname><given-names>H</given-names> </name><name name-style="western"><surname>Oura</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Comparing the diagnostic performance of GPT-4-based ChatGPT, GPT-4V-based ChatGPT, and radiologists in challenging neuroradiology cases</article-title><source>Clin Neuroradiol</source><year>2024</year><month>12</month><volume>34</volume><issue>4</issue><fpage>779</fpage><lpage>787</lpage><pub-id pub-id-type="doi">10.1007/s00062-024-01426-y</pub-id><pub-id pub-id-type="medline">38806794</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Horiuchi</surname><given-names>D</given-names> </name><name name-style="western"><surname>Tatekawa</surname><given-names>H</given-names> </name><name name-style="western"><surname>Oura</surname><given-names>T</given-names> </name><etal/></person-group><article-title>ChatGPT&#x2019;s diagnostic performance based on textual vs. visual information compared to radiologists&#x2019; diagnostic performance in musculoskeletal radiology</article-title><source>Eur Radiol</source><year>2025</year><month>01</month><volume>35</volume><issue>1</issue><fpage>506</fpage><lpage>516</lpage><pub-id pub-id-type="doi">10.1007/s00330-024-10902-5</pub-id><pub-id pub-id-type="medline">38995378</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kurokawa</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ohizumi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Kanzawa</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Diagnostic performances of Claude 3 Opus and Claude 3.5 Sonnet from patient history and key images in Radiology&#x2019;s &#x201C;Diagnosis Please&#x201D; cases</article-title><source>Jpn J Radiol</source><year>2024</year><month>12</month><volume>42</volume><issue>12</issue><fpage>1399</fpage><lpage>1402</lpage><pub-id pub-id-type="doi">10.1007/s11604-024-01634-z</pub-id><pub-id pub-id-type="medline">39096483</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wada</surname><given-names>A</given-names> </name><name name-style="western"><surname>Akashi</surname><given-names>T</given-names> </name><name name-style="western"><surname>Shih</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Optimizing GPT-4 turbo diagnostic accuracy in neuroradiology through prompt engineering and confidence thresholds</article-title><source>Diagnostics (Basel)</source><year>2024</year><month>07</month><day>17</day><volume>14</volume><issue>14</issue><fpage>1541</fpage><pub-id pub-id-type="doi">10.3390/diagnostics14141541</pub-id><pub-id pub-id-type="medline">39061677</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elek</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ekizalio&#x011F;lu</surname><given-names>DD</given-names> </name><name name-style="western"><surname>G&#x00FC;ler</surname><given-names>E</given-names> </name></person-group><article-title>Evaluating Microsoft Bing with ChatGPT-4 for the assessment of abdominal computed tomography and magnetic resonance images</article-title><source>Diagn Interv Radiol</source><year>2025</year><month>04</month><day>28</day><volume>31</volume><issue>3</issue><fpage>196</fpage><lpage>205</lpage><pub-id pub-id-type="doi">10.4274/dir.2024.232680</pub-id><pub-id pub-id-type="medline">39155793</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Khare</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bagal</surname><given-names>V</given-names> </name><name name-style="western"><surname>Mathew</surname><given-names>M</given-names> </name><name name-style="western"><surname>Devi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Priyakumar</surname><given-names>UD</given-names> </name><name name-style="western"><surname>Jawahar</surname><given-names>CV</given-names> </name></person-group><article-title>MMBERT: multimodal BERT pretraining for improved medical VQA</article-title><conf-name>2021 IEEE 18th International Symposium on Biomedical Imaging (ISBI)</conf-name><conf-date>Apr 13-16, 2021</conf-date><pub-id pub-id-type="doi">10.1109/ISBI48211.2021.9434063</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Pachade</surname><given-names>S</given-names> </name><name name-style="western"><surname>Datta</surname><given-names>S</given-names> </name><name name-style="western"><surname>Dong</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Self-supervised learning with radiology reports, a comparative analysis of strategies for large vessel occlusion and brain CTA images</article-title><conf-name>2023 IEEE 20th International Symposium on Biomedical Imaging (ISBI)</conf-name><conf-date>Apr 18-21, 2023</conf-date><pub-id pub-id-type="doi">10.1109/ISBI53787.2023.10230623</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Busch</surname><given-names>F</given-names> </name><name name-style="western"><surname>Han</surname><given-names>T</given-names> </name><name name-style="western"><surname>Makowski</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Truhn</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bressem</surname><given-names>KK</given-names> </name><name name-style="western"><surname>Adams</surname><given-names>L</given-names> </name></person-group><article-title>Integrating text and image analysis: exploring GPT-4V&#x2019;s capabilities in advanced radiological applications across subspecialties</article-title><source>J Med Internet Res</source><year>2024</year><month>05</month><day>1</day><volume>26</volume><fpage>e54948</fpage><pub-id pub-id-type="doi">10.2196/54948</pub-id><pub-id pub-id-type="medline">38691404</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Silva</surname><given-names>TP</given-names> </name><name name-style="western"><surname>Andrade-Bortoletto</surname><given-names>MFS</given-names> </name><name name-style="western"><surname>Ocampo</surname><given-names>TSC</given-names> </name><etal/></person-group><article-title>Performance of a commercially available generative pre-trained transformer (GPT) in describing radiolucent lesions in panoramic radiographs and establishing differential diagnoses</article-title><source>Clin Oral Investig</source><year>2024</year><month>03</month><day>9</day><volume>28</volume><issue>3</issue><fpage>204</fpage><pub-id pub-id-type="doi">10.1007/s00784-024-05587-5</pub-id><pub-id pub-id-type="medline">38459362</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>SH</given-names> </name><name name-style="western"><surname>Tong</surname><given-names>WJ</given-names> </name><name name-style="western"><surname>Li</surname><given-names>MD</given-names> </name><etal/></person-group><article-title>Collaborative enhancement of consistency and accuracy in US diagnosis of thyroid nodules using large language models</article-title><source>Radiology</source><year>2024</year><month>03</month><volume>310</volume><issue>3</issue><fpage>e232255</fpage><pub-id pub-id-type="doi">10.1148/radiol.232255</pub-id><pub-id pub-id-type="medline">38470237</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kottlors</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bratke</surname><given-names>G</given-names> </name><name name-style="western"><surname>Rauen</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Feasibility of differential diagnosis based on imaging patterns using a large language model</article-title><source>Radiology</source><year>2023</year><month>07</month><volume>308</volume><issue>1</issue><fpage>e231167</fpage><pub-id pub-id-type="doi">10.1148/radiol.231167</pub-id><pub-id pub-id-type="medline">37404149</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Overgaard Olesen</surname><given-names>AS</given-names> </name><name name-style="western"><surname>Miger</surname><given-names>KC</given-names> </name><name name-style="western"><surname>Nielsen</surname><given-names>OW</given-names> </name><name name-style="western"><surname>Grand</surname><given-names>J</given-names> </name></person-group><article-title>How does ChatGPT-4 match radiologists in detecting pulmonary congestion on chest X-ray?</article-title><source>J Med Artif Intell</source><year>2024</year><volume>7</volume><fpage>18</fpage><pub-id pub-id-type="doi">10.21037/jmai-24-26</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>RW</given-names> </name><name name-style="western"><surname>Kwon</surname><given-names>YE</given-names> </name></person-group><article-title>Validation of a deep learning chest X-ray interpretation model: integrating large-scale AI and large language models for comparative analysis with ChatGPT</article-title><source>Diagnostics</source><year>2023</year><month>12</month><day>30</day><volume>14</volume><issue>1</issue><fpage>90</fpage><pub-id pub-id-type="doi">10.3390/diagnostics14010090</pub-id><pub-id pub-id-type="medline">38201398</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reith</surname><given-names>TP</given-names> </name><name name-style="western"><surname>D&#x2019;Alessandro</surname><given-names>DM</given-names> </name><name name-style="western"><surname>D&#x2019;Alessandro</surname><given-names>MP</given-names> </name></person-group><article-title>Capability of multimodal large language models to interpret pediatric radiological images</article-title><source>Pediatr Radiol</source><year>2024</year><month>09</month><volume>54</volume><issue>10</issue><fpage>1729</fpage><lpage>1737</lpage><pub-id pub-id-type="doi">10.1007/s00247-024-06025-0</pub-id><pub-id pub-id-type="medline">39133401</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sarangi</surname><given-names>PK</given-names> </name><name name-style="western"><surname>Irodi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Panda</surname><given-names>S</given-names> </name><name name-style="western"><surname>Nayak</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Mondal</surname><given-names>H</given-names> </name></person-group><article-title>Radiological differential diagnoses based on cardiovascular and thoracic imaging patterns: perspectives of four large language models</article-title><source>Indian J Radiol Imaging</source><year>2023</year><month>12</month><day>28</day><volume>34</volume><issue>2</issue><fpage>269</fpage><lpage>275</lpage><pub-id pub-id-type="doi">10.1055/s-0043-1777289</pub-id><pub-id pub-id-type="medline">38549881</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mukherjee</surname><given-names>P</given-names> </name><name name-style="western"><surname>Hou</surname><given-names>B</given-names> </name><name name-style="western"><surname>Lanfredi</surname><given-names>RB</given-names> </name><name name-style="western"><surname>Summers</surname><given-names>RM</given-names> </name></person-group><article-title>Feasibility of using the privacy-preserving large language model vicuna for labeling radiology reports</article-title><source>Radiology</source><year>2023</year><month>10</month><volume>309</volume><issue>1</issue><fpage>e231147</fpage><pub-id pub-id-type="doi">10.1148/radiol.231147</pub-id><pub-id pub-id-type="medline">37815442</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tan</surname><given-names>RS</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Low</surname><given-names>GH</given-names> </name><etal/></person-group><article-title>Inferring cancer disease response from radiology reports using large language models with data augmentation and prompting</article-title><source>J Am Med Inform Assoc</source><year>2023</year><month>09</month><day>25</day><volume>30</volume><issue>10</issue><fpage>1657</fpage><lpage>1664</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocad133</pub-id><pub-id pub-id-type="medline">37451682</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tay</surname><given-names>SB</given-names> </name><name name-style="western"><surname>Low</surname><given-names>GH</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>GJ</given-names> </name><etal/></person-group><article-title>Use of natural language processing to infer sites of metastatic disease from radiology reports at scale</article-title><source>JCO Clin Cancer Inform</source><year>2024</year><month>05</month><volume>8</volume><fpage>e2300122</fpage><pub-id pub-id-type="doi">10.1200/CCI.23.00122</pub-id><pub-id pub-id-type="medline">38788166</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Russe</surname><given-names>MF</given-names> </name><name name-style="western"><surname>Fink</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ngo</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Performance of ChatGPT, human radiologists, and context-aware ChatGPT in identifying AO codes from radiology reports</article-title><source>Sci Rep</source><year>2023</year><month>08</month><day>30</day><volume>13</volume><issue>1</issue><fpage>14215</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-41512-8</pub-id><pub-id pub-id-type="medline">37648742</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Le Guellec</surname><given-names>B</given-names> </name><name name-style="western"><surname>Lef&#x00E8;vre</surname><given-names>A</given-names> </name><name name-style="western"><surname>Geay</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Performance of an open-source large language model in extracting information from free-text radiology reports</article-title><source>Radiol Artif Intell</source><year>2024</year><month>07</month><volume>6</volume><issue>4</issue><fpage>e230364</fpage><pub-id pub-id-type="doi">10.1148/ryai.230364</pub-id><pub-id pub-id-type="medline">38717292</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lybarger</surname><given-names>K</given-names> </name><name name-style="western"><surname>Damani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gunn</surname><given-names>M</given-names> </name><name name-style="western"><surname>Uzuner</surname><given-names>OZ</given-names> </name><name name-style="western"><surname>Yetisgen</surname><given-names>M</given-names> </name></person-group><article-title>Extracting radiological findings with normalized anatomical information using a span-based BERT relation extraction model</article-title><source>AMIA Jt Summits Transl Sci Proc</source><year>2022</year><month>05</month><day>23</day><volume>2022</volume><fpage>339</fpage><lpage>348</lpage><pub-id pub-id-type="medline">35854739</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dada</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ufer</surname><given-names>TL</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Information extraction from weakly structured radiological reports with natural language queries</article-title><source>Eur Radiol</source><year>2024</year><month>01</month><volume>34</volume><issue>1</issue><fpage>330</fpage><lpage>337</lpage><pub-id pub-id-type="doi">10.1007/s00330-023-09977-3</pub-id><pub-id pub-id-type="medline">37505252</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sun</surname><given-names>D</given-names> </name><name name-style="western"><surname>Hadjiiski</surname><given-names>L</given-names> </name><name name-style="western"><surname>Gormley</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Outcome prediction using multi-modal information: integrating large language model-extracted clinical information and image analysis</article-title><source>Cancers (Basel)</source><year>2024</year><month>06</month><day>29</day><volume>16</volume><issue>13</issue><fpage>2402</fpage><pub-id pub-id-type="doi">10.3390/cancers16132402</pub-id><pub-id pub-id-type="medline">39001463</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bhayana</surname><given-names>R</given-names> </name><name name-style="western"><surname>Elias</surname><given-names>G</given-names> </name><name name-style="western"><surname>Datta</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bhambra</surname><given-names>N</given-names> </name><name name-style="western"><surname>Deng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Krishna</surname><given-names>S</given-names> </name></person-group><article-title>Use of GPT-4 with single-shot learning to identify incidental findings in radiology reports</article-title><source>AJR Am J Roentgenol</source><year>2024</year><month>03</month><volume>222</volume><issue>3</issue><fpage>e2330651</fpage><pub-id pub-id-type="doi">10.2214/AJR.23.30651</pub-id><pub-id pub-id-type="medline">38197759</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Bibault</surname><given-names>JE</given-names> </name></person-group><article-title>Pilot applications of GPT-4 in radiation oncology: summarizing patient symptom intake and targeted chatbot applications</article-title><source>Radiother Oncol</source><year>2024</year><month>01</month><volume>190</volume><fpage>109978</fpage><pub-id pub-id-type="doi">10.1016/j.radonc.2023.109978</pub-id><pub-id pub-id-type="medline">37913954</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Danu</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Marica</surname><given-names>G</given-names> </name><name name-style="western"><surname>Karn</surname><given-names>SK</given-names> </name><etal/></person-group><article-title>Generation of radiology findings in chest X-ray by leveraging collaborative knowledge</article-title><source>Procedia Comput Sci</source><year>2023</year><volume>221</volume><fpage>1102</fpage><lpage>1109</lpage><pub-id pub-id-type="doi">10.1016/j.procs.2023.08.094</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hasani</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zahergivar</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Evaluating the performance of Generative Pre-trained Transformer-4 (GPT-4) in standardizing radiology reports</article-title><source>Eur Radiol</source><year>2024</year><month>06</month><volume>34</volume><issue>6</issue><fpage>3566</fpage><lpage>3574</lpage><pub-id pub-id-type="doi">10.1007/s00330-023-10384-x</pub-id><pub-id pub-id-type="medline">37938381</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ji</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hou</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>X</given-names> </name><name name-style="western"><surname>Pan</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xiang</surname><given-names>Y</given-names> </name></person-group><article-title>Vision-language model for generating textual descriptions from clinical images: model development and validation study</article-title><source>JMIR Form Res</source><year>2024</year><month>02</month><day>8</day><volume>8</volume><fpage>e32690</fpage><pub-id pub-id-type="doi">10.2196/32690</pub-id><pub-id pub-id-type="medline">38329788</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>L&#x00F3;pez-&#x00DA;beda</surname><given-names>P</given-names> </name><name name-style="western"><surname>Mart&#x00ED;n-Noguerol</surname><given-names>T</given-names> </name><name name-style="western"><surname>Escart&#x00ED;n</surname><given-names>J</given-names> </name><name name-style="western"><surname>Luna</surname><given-names>A</given-names> </name></person-group><article-title>Automatic generation of conclusions from neuroradiology MRI reports through natural language processing</article-title><source>Neuroradiology</source><year>2024</year><month>04</month><volume>66</volume><issue>4</issue><fpage>477</fpage><lpage>485</lpage><pub-id pub-id-type="doi">10.1007/s00234-024-03312-3</pub-id><pub-id pub-id-type="medline">38381144</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mallio</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Sertorio</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Bernetti</surname><given-names>C</given-names> </name><name name-style="western"><surname>Beomonte Zobel</surname><given-names>B</given-names> </name></person-group><article-title>Large language models for structured reporting in radiology: performance of GPT-4, ChatGPT-3.5, Perplexity and Bing</article-title><source>Radiol Med</source><year>2023</year><month>07</month><volume>128</volume><issue>7</issue><fpage>808</fpage><lpage>812</lpage><pub-id pub-id-type="doi">10.1007/s11547-023-01651-4</pub-id><pub-id pub-id-type="medline">37248403</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moezzi</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Ghaedi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Rahmanian</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mousavi</surname><given-names>SZ</given-names> </name><name name-style="western"><surname>Sami</surname><given-names>A</given-names> </name></person-group><article-title>Application of deep learning in generating structured radiology reports: a transformer-based technique</article-title><source>J Digit Imaging</source><year>2023</year><month>02</month><volume>36</volume><issue>1</issue><fpage>80</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.1007/s10278-022-00692-x</pub-id><pub-id pub-id-type="medline">36002778</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nakaura</surname><given-names>T</given-names> </name><name name-style="western"><surname>Yoshida</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kobayashi</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Preliminary assessment of automated radiology report generation with generative pre-trained transformers: comparing results to radiologist-generated reports</article-title><source>Jpn J Radiol</source><year>2024</year><month>02</month><volume>42</volume><issue>2</issue><fpage>190</fpage><lpage>200</lpage><pub-id pub-id-type="doi">10.1007/s11604-023-01487-y</pub-id><pub-id pub-id-type="medline">37713022</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Selivanov</surname><given-names>A</given-names> </name><name name-style="western"><surname>Rogov</surname><given-names>OY</given-names> </name><name name-style="western"><surname>Chesakov</surname><given-names>D</given-names> </name><name name-style="western"><surname>Shelmanov</surname><given-names>A</given-names> </name><name name-style="western"><surname>Fedulova</surname><given-names>I</given-names> </name><name name-style="western"><surname>Dylov</surname><given-names>DV</given-names> </name></person-group><article-title>Medical image captioning via generative pretrained transformers</article-title><source>Sci Rep</source><year>2023</year><month>03</month><day>13</day><volume>13</volume><issue>1</issue><fpage>4171</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-31223-5</pub-id><pub-id pub-id-type="medline">36914733</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Shentu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Al Moubayed</surname><given-names>N</given-names> </name></person-group><article-title>CXR-IRGen: an integrated vision and language model for the generation of clinically accurate chest X-ray image-report pairs</article-title><conf-name>2024 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)</conf-name><conf-date>Jan 3-8, 2024</conf-date><pub-id pub-id-type="doi">10.1109/WACV57701.2024.00513</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Soleimani</surname><given-names>M</given-names> </name><name name-style="western"><surname>Seyyedi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Ayyoubzadeh</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Kalhori</surname><given-names>SR</given-names> </name><name name-style="western"><surname>Keshavarz</surname><given-names>H</given-names> </name></person-group><article-title>Practical evaluation of ChatGPT performance for radiology report generation</article-title><source>Acad Radiol</source><year>2024</year><month>12</month><volume>31</volume><issue>12</issue><fpage>4823</fpage><lpage>4832</lpage><pub-id pub-id-type="doi">10.1016/j.acra.2024.07.020</pub-id><pub-id pub-id-type="medline">39142976</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wo&#x017A;nicki</surname><given-names>P</given-names> </name><name name-style="western"><surname>Laqua</surname><given-names>C</given-names> </name><name name-style="western"><surname>Fiku</surname><given-names>I</given-names> </name><etal/></person-group><article-title>Automatic structuring of radiology reports with on-premise open-source large language models</article-title><source>Eur Radiol</source><year>2025</year><month>04</month><volume>35</volume><issue>4</issue><fpage>2018</fpage><lpage>2029</lpage><pub-id pub-id-type="doi">10.1007/s00330-024-11074-y</pub-id><pub-id pub-id-type="medline">39390261</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Li</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ni</surname><given-names>M</given-names> </name><name name-style="western"><surname>Yuan</surname><given-names>H</given-names> </name></person-group><article-title>Learning to generate radiology findings from impressions based on large language model</article-title><conf-name>2023 IEEE International Conference on Big Data (BigData)</conf-name><conf-date>Dec 15-18, 2023</conf-date><pub-id pub-id-type="doi">10.1109/BigData59044.2023.10386916</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bhayana</surname><given-names>R</given-names> </name><name name-style="western"><surname>Nanda</surname><given-names>B</given-names> </name><name name-style="western"><surname>Dehkharghanian</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Large language models for automated synoptic reports and resectability categorization in pancreatic cancer</article-title><source>Radiology</source><year>2024</year><month>06</month><volume>311</volume><issue>3</issue><fpage>e233117</fpage><pub-id pub-id-type="doi">10.1148/radiol.233117</pub-id><pub-id pub-id-type="medline">38888478</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tie</surname><given-names>X</given-names> </name><name name-style="western"><surname>Shin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pirasteh</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Personalized impression generation for PET reports using large language models</article-title><source>J Imaging Inform Med</source><year>2024</year><month>04</month><volume>37</volume><issue>2</issue><fpage>471</fpage><lpage>488</lpage><pub-id pub-id-type="doi">10.1007/s10278-024-00985-3</pub-id><pub-id pub-id-type="medline">38308070</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Karn</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Ghosh</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kusuma</surname><given-names>P</given-names> </name><name name-style="western"><surname>Farri</surname><given-names>O</given-names> </name></person-group><article-title>shs-nlp at RadSum23: domain-adaptive pre-training of instruction-tuned LLMs for radiology report impression generation</article-title><conf-name>22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks</conf-name><conf-date>Jul 13, 2023</conf-date><pub-id pub-id-type="doi">10.18653/v1/2023.bionlp-1.57</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>X</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>Q</given-names> </name></person-group><article-title>Leveraging professional radiologists&#x2019; expertise to enhance LLMs&#x2019; evaluation for radiology reports</article-title><source>arXiv</source><access-date>2025-11-12</access-date><comment>Preprint posted online on  Jan 29, 2024</comment><comment><ext-link ext-link-type="uri" xlink:href="https://pmc.ncbi.nlm.nih.gov/articles/PMC11188146/">https://pmc.ncbi.nlm.nih.gov/articles/PMC11188146/</ext-link></comment><pub-id pub-id-type="doi">10.48550/arXiv.2401.16578</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gertz</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Bunck</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Lennartz</surname><given-names>S</given-names> </name><etal/></person-group><article-title>GPT-4 for automated determination of radiological study and protocol based on radiology request forms: a feasibility study</article-title><source>Radiology</source><year>2023</year><month>06</month><volume>307</volume><issue>5</issue><fpage>e230877</fpage><pub-id pub-id-type="doi">10.1148/radiol.230877</pub-id><pub-id pub-id-type="medline">37310247</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>B</given-names> </name><name name-style="western"><surname>Niu</surname><given-names>C</given-names> </name><etal/></person-group><article-title>IQAGPT: computed tomography image quality assessment with vision-language and ChatGPT models</article-title><source>Vis Comput Ind Biomed Art</source><year>2024</year><month>08</month><day>5</day><volume>7</volume><issue>1</issue><fpage>20</fpage><pub-id pub-id-type="doi">10.1186/s42492-024-00171-w</pub-id><pub-id pub-id-type="medline">39101954</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mistry</surname><given-names>NP</given-names> </name><name name-style="western"><surname>Saeed</surname><given-names>H</given-names> </name><name name-style="western"><surname>Rafique</surname><given-names>S</given-names> </name><name name-style="western"><surname>Le</surname><given-names>T</given-names> </name><name name-style="western"><surname>Obaid</surname><given-names>H</given-names> </name><name name-style="western"><surname>Adams</surname><given-names>SJ</given-names> </name></person-group><article-title>Large language models as tools to generate radiology board-style multiple-choice questions</article-title><source>Acad Radiol</source><year>2024</year><month>09</month><volume>31</volume><issue>9</issue><fpage>3872</fpage><lpage>3878</lpage><pub-id pub-id-type="doi">10.1016/j.acra.2024.06.046</pub-id><pub-id pub-id-type="medline">39013736</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nishio</surname><given-names>M</given-names> </name><name name-style="western"><surname>Matsunaga</surname><given-names>T</given-names> </name><name name-style="western"><surname>Matsuo</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Fully automatic summarization of radiology reports using natural language processing with large language models</article-title><source>Inform Med Unlocked</source><year>2024</year><volume>46</volume><fpage>101465</fpage><pub-id pub-id-type="doi">10.1016/j.imu.2024.101465</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yasaka</surname><given-names>K</given-names> </name><name name-style="western"><surname>Kanzawa</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kanemaru</surname><given-names>N</given-names> </name><name name-style="western"><surname>Koshino</surname><given-names>S</given-names> </name><name name-style="western"><surname>Abe</surname><given-names>O</given-names> </name></person-group><article-title>Fine-tuned large language model for extracting patients on pretreatment for lung cancer from a picture archiving and communication system based on radiological reports</article-title><source>J Imaging Inform Med</source><year>2025</year><month>02</month><volume>38</volume><issue>1</issue><fpage>327</fpage><lpage>334</lpage><pub-id pub-id-type="doi">10.1007/s10278-024-01186-8</pub-id><pub-id pub-id-type="medline">38955964</pub-id></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huemann</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>C</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cho</surname><given-names>SY</given-names> </name><name name-style="western"><surname>Bradshaw</surname><given-names>TJ</given-names> </name></person-group><article-title>Domain-adapted large language models for classifying nuclear medicine reports</article-title><source>Radiol Artif Intell</source><year>2023</year><month>09</month><day>27</day><volume>5</volume><issue>6</issue><fpage>e220281</fpage><pub-id pub-id-type="doi">10.1148/ryai.220281</pub-id><pub-id pub-id-type="medline">38074793</pub-id></nlm-citation></ref><ref id="ref71"><label>71</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kanzawa</surname><given-names>J</given-names> </name><name name-style="western"><surname>Yasaka</surname><given-names>K</given-names> </name><name name-style="western"><surname>Fujita</surname><given-names>N</given-names> </name><name name-style="western"><surname>Fujiwara</surname><given-names>S</given-names> </name><name name-style="western"><surname>Abe</surname><given-names>O</given-names> </name></person-group><article-title>Automated classification of brain MRI reports using fine-tuned large language models</article-title><source>Neuroradiology</source><year>2024</year><month>12</month><volume>66</volume><issue>12</issue><fpage>2177</fpage><lpage>2183</lpage><pub-id pub-id-type="doi">10.1007/s00234-024-03427-7</pub-id><pub-id pub-id-type="medline">38995393</pub-id></nlm-citation></ref><ref id="ref72"><label>72</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Shu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Exploring the potential of large language models in radiological imaging systems: improving user interface design and functional capabilities</article-title><source>Electronics</source><year>2024</year><volume>13</volume><issue>11</issue><fpage>2002</fpage><pub-id pub-id-type="doi">10.3390/electronics13112002</pub-id></nlm-citation></ref><ref id="ref73"><label>73</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abbasi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Lacson</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kapoor</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Development and external validation of an artificial intelligence model for identifying radiology reports containing recommendations for additional imaging</article-title><source>Am J Roentgenol</source><year>2023</year><month>09</month><volume>221</volume><issue>3</issue><fpage>377</fpage><lpage>385</lpage><pub-id pub-id-type="doi">10.2214/AJR.23.29120</pub-id><pub-id pub-id-type="medline">37466185</pub-id></nlm-citation></ref><ref id="ref74"><label>74</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kathait</surname><given-names>AS</given-names> </name><name name-style="western"><surname>Garza-Frias</surname><given-names>E</given-names> </name><name name-style="western"><surname>Sikka</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Assessing laterality errors in radiology: comparing generative artificial intelligence and natural language processing</article-title><source>J Am Coll Radiol</source><year>2024</year><month>10</month><volume>21</volume><issue>10</issue><fpage>1575</fpage><lpage>1582</lpage><pub-id pub-id-type="doi">10.1016/j.jacr.2024.06.014</pub-id><pub-id pub-id-type="medline">38960083</pub-id></nlm-citation></ref><ref id="ref75"><label>75</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sarangi</surname><given-names>PK</given-names> </name><name name-style="western"><surname>Lumbani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Swarup</surname><given-names>MS</given-names> </name><etal/></person-group><article-title>Assessing ChatGPT&#x2019;s proficiency in simplifying radiological reports for healthcare professionals and patients</article-title><source>Cureus</source><year>2023</year><month>12</month><day>21</day><volume>15</volume><issue>12</issue><fpage>e50881</fpage><pub-id pub-id-type="doi">10.7759/cureus.50881</pub-id><pub-id pub-id-type="medline">38249202</pub-id></nlm-citation></ref><ref id="ref76"><label>76</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rogasch</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Metzger</surname><given-names>G</given-names> </name><name name-style="western"><surname>Preisler</surname><given-names>M</given-names> </name><etal/></person-group><article-title>ChatGPT: can you prepare my patients for [<sup>18</sup>F]FDG PET/CT and explain my reports?</article-title><source>J Nucl Med</source><year>2023</year><month>12</month><day>1</day><volume>64</volume><issue>12</issue><fpage>1876</fpage><lpage>1879</lpage><pub-id pub-id-type="doi">10.2967/jnumed.123.266114</pub-id><pub-id pub-id-type="medline">37709536</pub-id></nlm-citation></ref><ref id="ref77"><label>77</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Butler</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Acosta</surname><given-names>E</given-names> </name><name name-style="western"><surname>Kuna</surname><given-names>MC</given-names> </name><etal/></person-group><article-title>Decoding radiology reports: artificial intelligence-large language models can improve the readability of hand and wrist orthopedic radiology reports</article-title><source>Hand (N Y)</source><year>2025</year><month>10</month><volume>20</volume><issue>7</issue><fpage>1144</fpage><lpage>1152</lpage><pub-id pub-id-type="doi">10.1177/15589447241267766</pub-id><pub-id pub-id-type="medline">39138809</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Full search strategies.</p><media xlink:href="medinform_v13i1e78041_app1.docx" xlink:title="DOCX File, 14 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Summary of all the included articles.</p><media xlink:href="medinform_v13i1e78041_app2.xlsx" xlink:title="XLSX File, 43 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Data modalities used across the included studies (N=67): text-only (eg, radiology reports, cases, and request forms), image-only (eg, x-ray, computed tomography, and magnetic resonance imaging), multimodal (text + images), and system or metadata sources (eg, radiology information system) sources.</p><media xlink:href="medinform_v13i1e78041_app3.png" xlink:title="PNG File, 44 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Distribution of radiology studies by subspecialty (N=67). This chart illustrates the number of studies conducted in each radiology subspecialty. Thoracic imaging, general radiology, and oncologic imaging were the most frequently studied areas.</p><media xlink:href="medinform_v13i1e78041_app4.png" xlink:title="PNG File, 111 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>Summary of the extracted themes from the included articles (N=67).</p><media xlink:href="medinform_v13i1e78041_app5.xlsx" xlink:title="XLSX File, 44 KB"/></supplementary-material><supplementary-material id="app6"><label>Multimedia Appendix 6</label><p>Reported performance metrics of large language model (LLM) applications in radiology across the included studies (N=67).</p><media xlink:href="medinform_v13i1e78041_app6.docx" xlink:title="DOCX File, 211 KB"/></supplementary-material><supplementary-material id="app7"><label>Checklist 1</label><p>PRISMA-ScR checklist.</p><media xlink:href="medinform_v13i1e78041_app7.docx" xlink:title="DOCX File, 87 KB"/></supplementary-material></app-group></back></article>