<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v12i1e63010</article-id>
      <article-id pub-id-type="pmid">39357052</article-id>
      <article-id pub-id-type="doi">10.2196/63010</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Comparative Study to Evaluate the Accuracy of Differential Diagnosis Lists Generated by Gemini Advanced, Gemini, and Bard for a Case Report Series Analysis: Cross-Sectional Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Karampinis</surname>
            <given-names>Emmanouil</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ai</surname>
            <given-names>Xuguang</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Hirosawa</surname>
            <given-names>Takanobu</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Diagnostic and Generalist Medicine</institution>
            <institution>Dokkyo Medical University</institution>
            <addr-line>880 Kitakobayashi, Mibu-cho</addr-line>
            <addr-line>Shimotsuga, 321-0293</addr-line>
            <country>Japan</country>
            <phone>81 282861111</phone>
            <email>hirosawa@dokkyomed.ac.jp</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3573-8203</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Harada</surname>
            <given-names>Yukinori</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6042-7397</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Tokumasu</surname>
            <given-names>Kazuki</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9513-6864</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Ito</surname>
            <given-names>Takahiro</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6807-5936</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Suzuki</surname>
            <given-names>Tomoharu</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5557-0516</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Shimizu</surname>
            <given-names>Taro</given-names>
          </name>
          <degrees>MD, MSc, MPH, MBA, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3788-487X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Diagnostic and Generalist Medicine</institution>
        <institution>Dokkyo Medical University</institution>
        <addr-line>Shimotsuga</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of General Medicine</institution>
        <institution>Graduate School of Medicine, Dentistry and Pharmaceutical Sciences</institution>
        <institution>Okayama University</institution>
        <addr-line>Okayama</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Satsuki Home Clinic</institution>
        <addr-line>Tochigi</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Hospital Medicine</institution>
        <institution>Urasoe General Hospital</institution>
        <addr-line>Okinawa</addr-line>
        <country>Japan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Takanobu Hirosawa <email>hirosawa@dokkyomed.ac.jp</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>2</day>
        <month>10</month>
        <year>2024</year>
      </pub-date>
      <volume>12</volume>
      <elocation-id>e63010</elocation-id>
      <history>
        <date date-type="received">
          <day>7</day>
          <month>6</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>22</day>
          <month>7</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>29</day>
          <month>7</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>6</day>
          <month>8</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Takanobu Hirosawa, Yukinori Harada, Kazuki Tokumasu, Takahiro Ito, Tomoharu Suzuki, Taro Shimizu. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 02.10.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2024/1/e63010" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Generative artificial intelligence (GAI) systems by Google have recently been updated from Bard to Gemini and Gemini Advanced as of December 2023. Gemini is a basic, free-to-use model after a user’s login, while Gemini Advanced operates on a more advanced model requiring a fee-based subscription. These systems have the potential to enhance medical diagnostics. However, the impact of these updates on comprehensive diagnostic accuracy remains unknown.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to compare the accuracy of the differential diagnosis lists generated by Gemini Advanced, Gemini, and Bard across comprehensive medical fields using case report series.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We identified a case report series with relevant final diagnoses published in the <italic>American Journal Case Reports</italic> from January 2022 to March 2023. After excluding nondiagnostic cases and patients aged 10 years and younger, we included the remaining case reports. After refining the case parts as case descriptions, we input the same case descriptions into Gemini Advanced, Gemini, and Bard to generate the top 10 differential diagnosis lists. In total, 2 expert physicians independently evaluated whether the final diagnosis was included in the lists and its ranking. Any discrepancies were resolved by another expert physician. Bonferroni correction was applied to adjust the <italic>P</italic> values for the number of comparisons among 3 GAI systems, setting the corrected significance level at <italic>P</italic> value &#60;.02.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>In total, 392 case reports were included. The inclusion rates of the final diagnosis within the top 10 differential diagnosis lists were 73% (286/392) for Gemini Advanced, 76.5% (300/392) for Gemini, and 68.6% (269/392) for Bard. The top diagnoses matched the final diagnoses in 31.6% (124/392) for Gemini Advanced, 42.6% (167/392) for Gemini, and 31.4% (123/392) for Bard. Gemini demonstrated higher diagnostic accuracy than Bard both within the top 10 differential diagnosis lists (<italic>P</italic>=.02) and as the top diagnosis (<italic>P</italic>=.001). In addition, Gemini Advanced achieved significantly lower accuracy than Gemini in identifying the most probable diagnosis (<italic>P</italic>=.002).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The results of this study suggest that Gemini outperformed Bard in diagnostic accuracy following the model update. However, Gemini Advanced requires further refinement to optimize its performance for future artificial intelligence–enhanced diagnostics. These findings should be interpreted cautiously and considered primarily for research purposes, as these GAI systems have not been adjusted for medical diagnostics nor approved for clinical use.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>artificial intelligence</kwd>
        <kwd>clinical decision support</kwd>
        <kwd>diagnostic excellence</kwd>
        <kwd>generative artificial intelligence</kwd>
        <kwd>large language models</kwd>
        <kwd>natural language processing</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Diagnostic Team to Reduce Misdiagnoses</title>
        <p>Diagnosis is a crucial step in clinical medicine, where a significant proportion of medical errors and harms are related to diagnostic errors [<xref ref-type="bibr" rid="ref1">1</xref>]. The formation of a diagnostic team has been proposed as an effective strategy to mitigate the risks associated with misdiagnosis [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. This team should promote collaboration among medical professionals, patients, and their families, and the integration of digital tools to enhance diagnostic accuracy [<xref ref-type="bibr" rid="ref4">4</xref>]. Several research, including systematic reviews, have shown that the implementation of clinical decision support systems (CDSSs) in clinical settings has significantly improved diagnostic accuracy, patient care, and health care process [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      </sec>
      <sec>
        <title>Digital Tool for Medical Diagnosis</title>
        <p>Various digital tools, particularly diagnostic CDSSs, have emerged for medical diagnostics. These systems are designed to provide diagnostic suggestions based on clinical data, aiding medical professionals in clinical decision-making [<xref ref-type="bibr" rid="ref8">8</xref>]. Traditionally, diagnostic CDSSs, such as symptom checkers and differential diagnosis generators, have relied on fixed algorithms and rule-based systems derived from medical databases and expert input [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. Unfortunately, these systems often experience poor accuracy and inadequate integration into clinical workflows, limiting their practical use in real-world medical settings [<xref ref-type="bibr" rid="ref4">4</xref>]. In this context, artificial intelligence (AI), especially generative AI (GAI), has introduced a new category of CDSS [<xref ref-type="bibr" rid="ref12">12</xref>]. This advancement suggests a future shift in how digital tools can support diagnostic processes.</p>
      </sec>
      <sec>
        <title>GAI in Medical Diagnosis</title>
        <p>GAI systems have shown rapid development and are increasingly influencing various fields, including medicine. This advancement is partly due to the development of machine learning techniques, such as neural networks and natural language processing. GAI represents a shift from rule-based systems to models that can autonomously generate and evaluate new data patterns. Overcoming many limitations faced by traditional CDSSs, GAI systems could significantly enhance future diagnostic processes. Notable examples include ChatGPT developed by Open AI, and Gemini and Gemini Advanced from Google [<xref ref-type="bibr" rid="ref13">13</xref>]. These systems use advanced large language models (LLMs), which are complex neural networks trained on vast data sets through natural language processing [<xref ref-type="bibr" rid="ref14">14</xref>]. Recent studies, including one evaluating dermoscopy image descriptions with chatbot responses, have demonstrated promising results in accuracy and diagnostic completeness by ChatGPT [<xref ref-type="bibr" rid="ref15">15</xref>]. The language model for dialogue applications (LaMDA) developed by Google AI is one such LLM. Their ability to process and generate outputs is particularly promising for future applications in medical diagnostics, where they will analyze complex clinical information and collaborate as part of a diagnostic team.</p>
      </sec>
      <sec>
        <title>From Bard to Gemini and Gemini Advanced</title>
        <p>Originally, Bard was developed using the LaMDA model primarily for text generation and conversational AI and later transitioned to the Pathways Language Model (Palm 2). Subsequent developments led to the release of Gemini and Gemini Advanced in December 2023. Gemini Advanced, an upgraded version of Gemini, leverages Ultra 1.0, Google’s most advanced model, offered as a fee-based service [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. These developments reflect the rapid pace at which GAI technology is advancing. Recent updates have transformed Bard into Gemini and Gemini Advanced, enhancing their functionalities and applications in various fields. Previous research, including our own, has demonstrated that Bard showed promising results in medicine [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. Moreover, a recent study has shown that several GAI systems, including Gemini Advanced, could achieve notable diagnostic accuracy for multiple-choice questions about clinical vignettes [<xref ref-type="bibr" rid="ref22">22</xref>]. These findings suggest that even without specific training or reinforcement for diagnostics, GAI systems show potential for reliable use in diagnostics. Despite these advancements, the comparative diagnostic accuracy of differential diagnosis lists by these GAI systems across comprehensive medical fields remains to be fully explored. This study aims to fill that gap by evaluating the diagnostic accuracy of the differential diagnosis lists generated by Gemini Advanced, Gemini, and Bard for case report series across various medical disciplines.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>An experimental study was conducted to assess the diagnostic accuracy of Gemini Advanced, Gemini, and Bard for a comprehensive case report series. This study was conducted at the Department of Diagnostic and Generalist Medicine (General Internal Medicine) at Dokkyo Medical University, Japan. This study consisted of preparing case materials, generating differential diagnosis lists, evaluating the lists, and analyzing the diagnostic accuracy. <xref rid="figure1" ref-type="fig">Figure 1</xref> shows the study flow, including the inclusion of case materials and the generation of differential diagnosis lists.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Study flow of inclusion case materials and generation of differential diagnosis.</p>
          </caption>
          <graphic xlink:href="medinform_v12i1e63010_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Preparing Case Materials</title>
        <p>We focused on a comprehensive series of case reports from the <italic>American Journal of Case Reports</italic>, covering a broad range of medical fields. The structured format of the journal facilitated easy identification of sections containing the case reports and the final diagnoses. Initially, the inclusion criteria were the case reports published in the <italic>American Journal of Case Reports</italic> from January 1, 2022, to March 1, 2023. A PubMed search identified 557 consecutive case reports. After excluding 130 nondiagnostic case reports and 35 pediatric case reports (patients aged 10 years and younger), 392 case reports remained. The exclusion criteria were based on previous research for CDSS [<xref ref-type="bibr" rid="ref23">23</xref>]. We refined the case reports to prepare the case materials, which typically included the initial case report part to the definitive tests for final diagnosis. The relevant final diagnoses were typically described by the authors. We used only textual data exclusively, omitting image data. Specifically, the title, background, final diagnosis, clinical course following diagnosis, discussion, conclusion, figures, tables, and supplemental materials were excluded from the case materials. The main investigator (TH) conducted this process with validation from another investigator (YH). The PubMed search keywords are shown in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. For example, in a case report titled “Herpes Zoster Following COVID-19 Vaccine Booster,” the final diagnosis was herpes zoster [<xref ref-type="bibr" rid="ref24">24</xref>]. We extracted the case report part from “An 82-year-old..” to “Vesicular breath sounds were heard equally on both lung fields.”</p>
      </sec>
      <sec>
        <title>Generating Differential Diagnosis Lists</title>
        <p>We used Gemini Advanced, Gemini, and Bard as GAI systems for this research. This was because these systems are popular AI platforms available to the public. These GAI systems were not specifically enhanced for medical diagnosis. Details about the GAI systems used in this study are provided in <xref ref-type="table" rid="table1">Table 1</xref>. To generate the top 10 differential diagnosis lists from GAI systems, the main investigator typically copied and pasted the case materials into the AI systems with the prompt, that is “Tell me the top 10 suspected illnesses for the following case: (case materials).” This prompt, developed through preliminary research, aimed to generate the top 10 differential diagnosis lists. The first list produced by the GAI systems was used as the differential diagnosis list. The data control setting was adjusted to “Not saving activity,” to avoid the influence from the previous conversations. In addition, before starting a new session, the main investigator refreshed the previous session to prevent any influence from previous conversations.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>The details of generative artificial intelligence systems used in this study.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="330"/>
            <col width="320"/>
            <col width="320"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Gemini Advanced</td>
                <td>Gemini</td>
                <td>Bard</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>AI<sup>a</sup> model</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ultra 1.0</td>
                <td>Pro</td>
                <td>Pathways Language Model (Palm 2)-based</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Availability</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Fee-based subscription</td>
                <td>Free with user login</td>
                <td>Discontinued</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>The setting of the app activity</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not saving activity</td>
                <td>Not saving activity</td>
                <td>Not saving activity</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Access date</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>April 4-9, 2024</td>
                <td>March 12-28, 2024</td>
                <td>July 1, 2023-August 8 2023</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Prompt</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>“Tell me the top 10 suspected illnesses for the following case: (case materials).”</td>
                <td>“Tell me the top 10 suspected illnesses for the following case: (case materials).”</td>
                <td>“Tell me the top 10 suspected illnesses for the following case: (case materials).”</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Evaluating the Differential Diagnosis Lists</title>
        <p>A total of 2 expert researchers (TI and T Suzuki) independently evaluated the differential diagnosis lists from GAI systems. A score of “1” was assigned if the differential accurately and specifically identified the final diagnosis or was sufficiently close to the final diagnosis. Conversely, a score of “0” was assigned if it diverged significantly from the final diagnosis [<xref ref-type="bibr" rid="ref25">25</xref>]. When a GAI system could not output the differential diagnosis list, a score of “0” was labeled. When the score was “1,” the evaluator assessed its ranking within the list. Any discrepancies were resolved by another expert researcher (KT). All evaluators were blinded to which GAI systems produced the differential diagnosis lists.</p>
      </sec>
      <sec>
        <title>Analyzing the Diagnostic Accuracy</title>
        <p>In this study, we defined diagnostic accuracy as the inclusion of the final diagnoses in the differential diagnosis lists.</p>
      </sec>
      <sec>
        <title>Outcome</title>
        <p>In terms of the outcomes, the primary outcome was the total score for correctly identifying the final diagnosis in the top 10 differential diagnosis lists generated by Gemini Advanced, Gemini, and Bard. The total number of included case reports was used as the denominator. The numerator was the number of case reports that correctly identified the final diagnosis in the top 10 differential diagnosis lists. The secondary outcomes were the total score for correctly identifying the final diagnosis in the top 5 differential diagnosis lists and as the top diagnosis generated from Gemini Advanced, Gemini, and Bard.</p>
        <p>In addition, we evaluated the top 10 rankings of the most frequently named differential diagnoses across generated differential diagnosis lists by a GAI system to find the underlying patterns. We also assessed whether the items in the differential diagnosis lists corresponded to the names of existing diseases.</p>
        <p>Moreover, we analyzed how Gemini Advanced, Gemini, and Bard rank the correct diagnosis on average when it appears in the differential diagnosis lists. This metric helps evaluate not only whether the correct diagnosis is included but also its relative priority among other suggested diagnoses. For cases where the correct diagnosis was missing, we assigned a penalty rank; specifically, we used 11 as the penalty rank.</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>A chi-square test was used for the categorical or binary variables. The Mann-Whitney <italic>U</italic> test was applied to analyze the average rankings. For multiple comparisons, the Bonferroni correction was applied [<xref ref-type="bibr" rid="ref26">26</xref>]. The Bonferroni correction adjusts the <italic>P</italic> value by dividing it by the number of comparisons being made, namely 3 GAI systems, that consist of Gemini Advanced, Gemini, and Bard, setting a more stringent threshold for individual tests to be considered significant. The Bonferroni-corrected significance level was defined as a <italic>P</italic> value of &#60;.02. Note that all statistical tests were performed using the R (version 4.2.2; The R Foundation for Statistical Computing) for macOS X.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>Given the study’s method of using published case reports, approval from an ethical committee was deemed not applicable.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview of Outputs From GAI Systems</title>
        <p>In total, 392 case reports were included. The distribution of diagnoses in these reports highlighted the prevalence of complex medical conditions, with 22.7% (89/392) of cases being diagnosed as malignancy, 22.4% (88/392) as various infections, and 17.1% (67/392) as vascular diseases.</p>
        <p>Bard generated differential diagnosis lists for all 392 cases. In contrast, both Gemini Advanced and Gemini encountered challenges with 3 cases, failing to produce outputs. The cases where outputs were not generated involved complex clinical scenarios that were directly described in the reports as involving overdose, psychological conditions, and vaccination side effects. Representative outputs from Gemini Advanced, Gemini, and Bard are shown in <xref ref-type="table" rid="table2">Table 2</xref>. The data set of differential diagnoses generated by AI systems in this study, alongside the final diagnosis, are shown in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Representative outputs from Gemini Advanced, Gemini, and Bard for final diagnoses.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="330"/>
            <col width="320"/>
            <col width="320"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Gemini Advanced</td>
                <td>Gemini</td>
                <td>Bard</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Herpes Zoster</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Shingles (Herpes Zoster)</p>
                    </list-item>
                    <list-item>
                      <p>Myocardial ischemia</p>
                    </list-item>
                    <list-item>
                      <p>Costochondritis</p>
                    </list-item>
                    <list-item>
                      <p>Pericarditis</p>
                    </list-item>
                    <list-item>
                      <p>Pleuritis</p>
                    </list-item>
                    <list-item>
                      <p>Pulmonary embolism</p>
                    </list-item>
                    <list-item>
                      <p>Pneumonia</p>
                    </list-item>
                    <list-item>
                      <p>Metastatic malignancy</p>
                    </list-item>
                    <list-item>
                      <p>Aortic dissection</p>
                    </list-item>
                    <list-item>
                      <p>Esophageal spasm</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Shingles (Herpes Zoster)</p>
                    </list-item>
                    <list-item>
                      <p>Myocardial ischemia (Angina)</p>
                    </list-item>
                    <list-item>
                      <p>Costochondritis</p>
                    </list-item>
                    <list-item>
                      <p>Musculoskeletal strain</p>
                    </list-item>
                    <list-item>
                      <p>Pleurisy</p>
                    </list-item>
                    <list-item>
                      <p>Intercostal neuritis</p>
                    </list-item>
                    <list-item>
                      <p>Pulmonary embolism</p>
                    </list-item>
                    <list-item>
                      <p>Pericarditis</p>
                    </list-item>
                    <list-item>
                      <p>Gastritis</p>
                    </list-item>
                    <list-item>
                      <p>Esophagitis</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Herpes zoster (Shingles)</p>
                    </list-item>
                    <list-item>
                      <p>Myocardial infarction (heart attack)</p>
                    </list-item>
                    <list-item>
                      <p>Aortic dissection</p>
                    </list-item>
                    <list-item>
                      <p>Pulmonary embolism</p>
                    </list-item>
                    <list-item>
                      <p>Pneumonia</p>
                    </list-item>
                    <list-item>
                      <p>Thoracic radiculopathy</p>
                    </list-item>
                    <list-item>
                      <p>Costochondritis</p>
                    </list-item>
                    <list-item>
                      <p>Intercostal neuralgia</p>
                    </list-item>
                    <list-item>
                      <p>Costoclavicular syndrome</p>
                    </list-item>
                    <list-item>
                      <p>Tietze syndrome</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Diffuse alveolar hemorrhage</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Pulmonary alveolar hemorrhage</p>
                    </list-item>
                    <list-item>
                      <p>Nocardia pneumonia</p>
                    </list-item>
                    <list-item>
                      <p>Pulmonary embolism</p>
                    </list-item>
                    <list-item>
                      <p>Acute eosinophilic pneumonia</p>
                    </list-item>
                    <list-item>
                      <p>Drug-induced pneumonitis</p>
                    </list-item>
                    <list-item>
                      <p>Legionnaires' disease</p>
                    </list-item>
                    <list-item>
                      <p>Hypersensitivity pneumonitis</p>
                    </list-item>
                    <list-item>
                      <p>Idiopathic pulmonary fibrosis</p>
                    </list-item>
                    <list-item>
                      <p>Bronchiolitis obliterans organizing pneumonia</p>
                    </list-item>
                    <list-item>
                      <p>Acute interstitial pneumonitis</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Pulmonary alveolar hemorrhage</p>
                    </list-item>
                    <list-item>
                      <p>Trastuzumab-induced pneumonitis</p>
                    </list-item>
                    <list-item>
                      <p>Aspirin-exacerbated respiratory disease</p>
                    </list-item>
                    <list-item>
                      <p>Acute heart failure</p>
                    </list-item>
                    <list-item>
                      <p>Pulmonary edema</p>
                    </list-item>
                    <list-item>
                      <p>Nocardia pneumonia</p>
                    </list-item>
                    <list-item>
                      <p>Pulmonary hypertension</p>
                    </list-item>
                    <list-item>
                      <p>Acute interstitial lung disease</p>
                    </list-item>
                    <list-item>
                      <p>Acute chronic kidney disease</p>
                    </list-item>
                    <list-item>
                      <p>Occult gastrointestinal bleed</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Acute radiation pneumonitis</p>
                    </list-item>
                    <list-item>
                      <p>Acute hypersensitivity pneumonitis</p>
                    </list-item>
                    <list-item>
                      <p>Pulmonary edema</p>
                    </list-item>
                    <list-item>
                      <p>Pulmonary embolism</p>
                    </list-item>
                    <list-item>
                      <p>Aspiration pneumonia</p>
                    </list-item>
                    <list-item>
                      <p>Bronchogenic carcinoma</p>
                    </list-item>
                    <list-item>
                      <p>Sarcoidosis</p>
                    </list-item>
                    <list-item>
                      <p>Lymphoma</p>
                    </list-item>
                    <list-item>
                      <p>Wegener’s granulomatosis</p>
                    </list-item>
                    <list-item>
                      <p>Goodpasture syndrome</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Glomus tumor</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Meralgia paresthetica</p>
                    </list-item>
                    <list-item>
                      <p>Soft tissue sarcoma</p>
                    </list-item>
                    <list-item>
                      <p>Fibroma</p>
                    </list-item>
                    <list-item>
                      <p>Myositis ossificans</p>
                    </list-item>
                    <list-item>
                      <p>Neuroma</p>
                    </list-item>
                    <list-item>
                      <p>Hemangioma</p>
                    </list-item>
                    <list-item>
                      <p>Liposarcoma</p>
                    </list-item>
                    <list-item>
                      <p>Lymphoma</p>
                    </list-item>
                    <list-item>
                      <p>Abscess</p>
                    </list-item>
                    <list-item>
                      <p>Malignant peripheral nerve sheath tumor</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Meralgia paresthetica</p>
                    </list-item>
                    <list-item>
                      <p>Lumbar radiculopathy</p>
                    </list-item>
                    <list-item>
                      <p>Femoral neuropathy</p>
                    </list-item>
                    <list-item>
                      <p>Piriformis syndrome</p>
                    </list-item>
                    <list-item>
                      <p>Muscle strain</p>
                    </list-item>
                    <list-item>
                      <p>Myofascial pain syndrome</p>
                    </list-item>
                    <list-item>
                      <p>Glomus tumor</p>
                    </list-item>
                    <list-item>
                      <p>Neuroma</p>
                    </list-item>
                    <list-item>
                      <p>Soft tissue sarcoma (unlikely)</p>
                    </list-item>
                    <list-item>
                      <p>Abscess (unlikely)</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Lipoma</p>
                    </list-item>
                    <list-item>
                      <p>Fibromatosis</p>
                    </list-item>
                    <list-item>
                      <p>Myositis</p>
                    </list-item>
                    <list-item>
                      <p>Neoplasm</p>
                    </list-item>
                    <list-item>
                      <p>Infection</p>
                    </list-item>
                    <list-item>
                      <p>Inflammation</p>
                    </list-item>
                    <list-item>
                      <p>Trauma</p>
                    </list-item>
                    <list-item>
                      <p>Hematoma</p>
                    </list-item>
                    <list-item>
                      <p>Varicose vein</p>
                    </list-item>
                    <list-item>
                      <p>Cellulitis</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Diagnostic Accuracy</title>
        <p>In terms of diagnostic accuracy, the inclusion rates of the final diagnoses within the top 10 differential diagnosis lists were 73.0% (286/392) for Gemini Advanced, 76.5% (300/392) for Gemini, and 68.6% (269/392) for Bard. For the top 5 differential diagnoses, the rates were 60.5% (237/392) for Gemini Advanced, 66.3% (260/392) for Gemini, and 59.9% (235/392) for Bard. The top diagnoses matched the final diagnoses in 31.6% (124/392) for Gemini Advanced, 42.6% (167/392) for Gemini, and 31.4% (123/392) for Bard. Gemini demonstrated higher diagnostic accuracy than Bard both within the top 10 differential diagnosis lists (<italic>P</italic>=.02) and as the top diagnosis (<italic>P</italic>=.001). In addition, Gemini Advanced achieved lower accuracy in identifying the most probable diagnosis, compared with Gemini with this result being statistically significant (<italic>P</italic>=.002). Other comparisons were statistically insignificant. <xref ref-type="table" rid="table3">Table 3</xref> and <xref rid="figure2" ref-type="fig">Figure 2</xref> show the diagnostic accuracy by Gemini Advanced, Gemini, and Bard.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Diagnostic accuracy of Gemini Advanced, Gemini, and Bard.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="130"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Variable</td>
                <td>Gemini Advanced (N=392), n (%)</td>
                <td>Gemini (N=392), n (%)</td>
                <td>Bard (N=392), n (%)</td>
                <td colspan="3"><italic>P</italic> value<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Gemini Advanced versus Gemini</td>
                <td>Gemini Advanced versus Bard</td>
                <td>Gemini versus Bard</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Within the top 10</td>
                <td>286 (73.0)</td>
                <td>300 (76.5)</td>
                <td>269 (68.6)</td>
                <td>.29</td>
                <td>.21</td>
                <td>.02</td>
              </tr>
              <tr valign="top">
                <td>Within the top 5</td>
                <td>237 (60.5)</td>
                <td>260 (66.3)</td>
                <td>235 (59.9)</td>
                <td>.10</td>
                <td>.94</td>
                <td>.08</td>
              </tr>
              <tr valign="top">
                <td>Top diagnosis</td>
                <td>124 (31.6)</td>
                <td>167 (42.6)</td>
                <td>123 (31.4)</td>
                <td>.002</td>
                <td>.99</td>
                <td> .001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Chi-square test. The Bonferroni-corrected significance level at a <italic>P</italic> value &#60;.02.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Diagnostic accuracy of Gemini Advanced, Gemini, and Bard. <italic>P</italic> values were derived from the chi-square test. The Bonferroni-corrected significance level at a <italic>P</italic> value &#60;.02.</p>
          </caption>
          <graphic xlink:href="medinform_v12i1e63010_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Most Frequently Named Differential Diagnoses</title>
        <p>Regarding the top 10 most frequently named differential diagnoses, all rankings included sepsis, pneumonia, pulmonary embolism, lymphoma, and meningitis. Notably, the top 3 most frequently named differential diagnoses by Gemini Advanced and Gemini were the same. <xref ref-type="table" rid="table4">Table 4</xref> shows the top 10 most frequently named differential diagnoses generated by Gemini Advanced, Gemini, and Bard.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>The top 10 most frequently named differential diagnoses were generated by Gemini Advanced, Gemini, and Bard.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>The raking in the top 10 most frequently named differentials, (N)</td>
                <td>Gemini Advanced (n)</td>
                <td>Gemini (n)</td>
                <td>Bard (n)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Sepsis (43)</td>
                <td>Sepsis (42)</td>
                <td>Sarcoidosis (51)</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Pneumonia (34)</td>
                <td>Pneumonia (28)</td>
                <td>Sepsis (42)</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Pulmonary embolism (33)</td>
                <td>Pulmonary embolism (20)</td>
                <td>Pneumonia (41)</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Acute kidney injury (28)</td>
                <td>Sarcoidosis (15)</td>
                <td>Lymphoma (40)</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Lymphoma (25)</td>
                <td>Pericarditis (14)</td>
                <td>Pulmonary embolism (39)</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Urinary tract infection (24)</td>
                <td>Meningitis (14)</td>
                <td>Meningitis (31)</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>Heart failure (23)</td>
                <td>Lymphoma (13)</td>
                <td>Inflammatory bowel disease (29)</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>Meningitis (22)</td>
                <td>Myocarditis (13)</td>
                <td>Tuberculosis (26)</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>Myocardial infarction (20)</td>
                <td>Acute kidney injury (12)</td>
                <td>Encephalitis (25)</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>Pericarditis (18)</td>
                <td>Systemic lupus erythematosus (12)</td>
                <td>Myocarditis (24)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Inappropriate Diseases Names</title>
        <p>From all differential diagnosis lists output by generative AIs, we identified inappropriate disease names: 11 items from Gemini Advanced, 9 items from Gemini, and 5 items from Bard. Notably, Gemini Advanced and Gemini both erroneously listed “Wegner’s granulomatosis,” a misspelling of the previous correct term, “Wegener’s granulomatosis,” which has now been updated to “Granulomatosis with Polyangiitis” [<xref ref-type="bibr" rid="ref27">27</xref>]. Another error by Gemini Advanced involved “Microcytic colitis,” likely a confusion between “microcystic anemia” and “microscopic colitis.” <xref ref-type="table" rid="table5">Table 5</xref> lists the inappropriate disease names generated by Gemini Advanced, Gemini, and Bard.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Inappropriate disease name generated by Gemini Advanced, Gemini, and Bard.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="290"/>
            <col width="230"/>
            <col width="270"/>
            <col width="210"/>
            <thead>
              <tr valign="top">
                <td>Correct disease name, [cell number in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>]</td>
                <td>Inappropriate disease name by Gemini Advanced</td>
                <td>Inappropriate disease name by Gemini</td>
                <td>Inappropriate disease name by Bard</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Drug reaction (Dasatinib)</td>
                <td>Drug reation (Dasatinib) [O15]</td>
                <td>—<sup>a</sup></td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Nonketotic hyperglycemic hyperosmolar coma</td>
                <td>—</td>
                <td>Nonketotic hyperglycemia hyperosmolar coma [X29]</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Lipedema</td>
                <td>—</td>
                <td>Lipoderma [U34]</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Small bowel angiodysplasia</td>
                <td>Small bowel angiodisplasia [M40]</td>
                <td>
                  <break/>
                </td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Granulomatosis with polyangiitis</td>
                <td>Granulomatous with polyangiitis (L68), Wegner’s granulomatosis [N111]</td>
                <td>Wegner’s granulomatosis [U186]</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Costochondritis</td>
                <td>Costochondritisa [P86]</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Maxillary sinus carcinoma</td>
                <td>—</td>
                <td>Maxillary sinus cycinoma [L106]</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Constrictive pericarditis</td>
                <td>Conrictive pericarditis [L110]</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Scleroderma-related interstitial lung disease</td>
                <td>—</td>
                <td>Scleroderma-related interstitial lung disease [S117]</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Pericoronitis</td>
                <td>—</td>
                <td>—</td>
                <td>Pericoronatitis [AA133]</td>
              </tr>
              <tr valign="top">
                <td>Osteitis</td>
                <td>—</td>
                <td>—</td>
                <td>Osteoitis [AC133]</td>
              </tr>
              <tr valign="top">
                <td>Microscopic colitis</td>
                <td>Microcytic colitis [L152]</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Pneumocystis jirovecii</td>
                <td>—</td>
                <td>—</td>
                <td>Pneumocystis jerovecii [AG156]</td>
              </tr>
              <tr valign="top">
                <td>Leukoencephalopathy</td>
                <td>—</td>
                <td>Leukoencephalomyopathy [X195]</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Strumal carcinoid</td>
                <td>—</td>
                <td>Struma carcinoid [W208]</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Restrictive ventilatory impairment</td>
                <td>Restricted ventilatory impairment [J360]</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Moebius syndrome</td>
                <td>—</td>
                <td>Mobius syndrome [Z369]</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Endometriosis</td>
                <td>—</td>
                <td>—</td>
                <td>Endometrios [AE385]</td>
              </tr>
              <tr valign="top">
                <td>Cryptococcus neoformans</td>
                <td>—</td>
                <td>—</td>
                <td>Chryptococcus neoformans [AJ389]</td>
              </tr>
              <tr valign="top">
                <td>Unknown</td>
                <td>Ytzinger hernia [M197]</td>
                <td>Y-type appendicitis [V197]</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Unknown</td>
                <td>(There was partly Arabic language) [L292]</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Unknown (Transaminase elevation is also not disease name)</td>
                <td>Transaminitis elevation (N354)</td>
                <td>—</td>
                <td>—</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Average Ranking</title>
        <p>In terms of average ranking, the scores were 5.25 (SD 4.16) for Gemini Advanced, 4.54 (SD 4.21) for Gemini, and 5.33 (SD 4.29) for Bard. The differences in average rankings were not statistically significant between Gemini Advanced and Gemini (<italic>P</italic>=.99), between Gemini Advanced and Bard (<italic>P</italic>=.17), and between Gemini and Bard (<italic>P</italic>=.99).</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In the following, we discuss our principal findings. Our findings indicate that Gemini demonstrated superior diagnostic accuracy compared with Bard, not only within the top 10 differential diagnosis lists but also in identifying the most likely diagnosis. Specifically, Gemini’s diagnostic accuracy for the top 10 lists was 76.5% (300/392), compared to Bard’s 68.6% (269/392), with a statistically significant difference (<italic>P</italic>=.02). Moreover, as the top diagnosis, Gemini’s diagnostic accuracy was 42.6% (167/392) versus Bard’s 31.4% (123/392), also significant (<italic>P</italic>=.001). This enhancement in Gemini’s diagnostic performance may be attributed to its advanced algorithmic framework, which likely incorporates more nuanced medical data and learns from recent case inputs, leading to more refined diagnostic predictions.</p>
        <p>However, the performance of Gemini did not statistically outperform in the top 5 differential diagnosis lists. This outcome may suggest that while Gemini’s algorithm is effective in a broader exploratory context, its precision may falter when constrained to a narrower list of top diagnoses. This indicates that a balance between breadth of exploration and depth of focus is crucial for optimizing diagnostic accuracy in such AI systems.</p>
        <p>Conversely, our analysis showed that Gemini Advanced did not perform as well as expected when compared with Gemini. Despite expectations that the advanced model would provide enhanced diagnostic capabilities, it achieved lower accuracy in identifying the most probable diagnosis with 31.6% (124/392) compared to Gemini’s 42.6% (167/392), with this result being statistically significant (<italic>P</italic>=.002). This outcome suggests that the additional features or complexity added in Gemini Advanced may not necessarily translate into improved diagnostic performance. These findings underscore the need for further refinement and optimization of Gemini Advanced to harness its potential for future AI-enhanced diagnostics.</p>
        <p>In addition, our analysis identified issues with inappropriate disease naming in the outputs from GAI systems, with Gemini Advanced and Gemini producing outdated or misspelled terms for vasculitis, instead of using the updated name. These inaccuracies highlight the challenges in ensuring up-to-date and precise medical terminology in AI outputs, which is crucial for maintaining trust and reliability in AI-assisted diagnostics. Furthermore, these misspellings are often found in published medical articles, suggesting that GAIs may have learned these errors from these sources. The fact that both Gemini Advanced and Gemini exhibited the same mistakes indicates potential similarities in their underlying models or training data.</p>
        <p>Regarding average rankings, there were no statistically significant differences among generative AI systems. This indicates a level of parity in how each model ranks diagnoses when they include the correct diagnosis, suggesting that while there are differences in overall accuracy, the ranking mechanisms of each model are relatively similar.</p>
        <p>Given the current performance metrics, our analysis supports prioritizing the adjustment and enhancement of Gemini for future applications in medical diagnostics, rather than Gemini Advanced. Despite the theoretically superior capabilities of Gemini Advanced [<xref ref-type="bibr" rid="ref17">17</xref>], Gemini’s framework appears more aligned with practical diagnostic needs and shows greater promise in real-world applications. However, it is essential to verify this trend across a variety of sources to ensure that these findings are not specific to the data sets used in this study. Further investigations involving diverse clinical environments and different types of medical data are crucial to confirm the consistency and reliability of Gemini’s superior performance.</p>
        <p>Finally, the comparative analysis of the differentials by Gemini Advanced, Gemini, and Bard revealed consistent inclusion of sepsis, pneumonia, pulmonary embolism, lymphoma, and meningitis among their top 10 differentials. This underscores not only a shared prioritization of these conditions but also the effectiveness of systems in recognizing critical and prevalent diseases. The consistent identification of sepsis, particularly its second-place ranking by Bard, underscores the potential of these AI systems to enhance diagnostic accuracy and reduce errors in the identification of life-threatening conditions [<xref ref-type="bibr" rid="ref28">28</xref>]. Importantly, the top 3 differentials by Gemini Advanced and Gemini—sepsis, pneumonia, and pulmonary embolism—are among the most harmful diseases where reducing diagnostic errors is crucial [<xref ref-type="bibr" rid="ref1">1</xref>]. This suggests a potential for GAI systems to alert medical professionals about the inclusion of these important diseases during diagnosis. Such an understanding could facilitate more effective use of these GAI systems in future diagnostics processes.</p>
      </sec>
      <sec>
        <title>Strengths</title>
        <p>This study had several strengths. First, the strengths of this study lie in its direct comparison of 3 cutting-edge AI systems and its demonstration of the dynamic improvements in their diagnostic accuracy. Unlike some CDSSs like symptom checkers, whose performance has plateaued [<xref ref-type="bibr" rid="ref29">29</xref>], these GAI systems evaluated in this research show considerable enhancements with each iteration. Second, we evaluated the diagnostic accuracy of GAI systems using a series of case reports. These case reports often describe rare diseases and atypical presentations, as opposed to common diseases and typical presentations [<xref ref-type="bibr" rid="ref30">30</xref>]. This showcases the system’s diagnostic capabilities under challenging conditions. Third, the comprehensive range of medical conditions covered by the differential diagnosis lists generated by the AI systems represents a significant strength of this study. This extensive coverage demonstrates the systems’ capacity to handle a broad spectrum of medical knowledge and its applicability to various clinical scenarios.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Several limitations should be discussed. First, the use of case report series might not fully reflect real-world clinical scenarios. This limitation arises because case reports typically focus on novel or rare aspects of diseases rather than typical presentations and common diseases [<xref ref-type="bibr" rid="ref30">30</xref>]. Second, the exclusive use of a single case report journal could introduce selection bias. Third, there was no well-established method for evaluating AI diagnostics. In our study, we used binary evaluation methods. In contrast, other research on CDSSs used several rating methods [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>] and the ranking averages in the differential diagnosis lists [<xref ref-type="bibr" rid="ref33">33</xref>]. Fourth, we used only text data; excluding image data could influence the diagnostic performance. These factors limit the generalizability of these findings.</p>
        <p>Concerning the GAI systems, all platforms used in this study were not designed for clinical use and have not received approval for medical diagnostics. These systems were not specifically reinforced or enhanced for medical diagnostic purposes. According to a preprint, Med-Gemini, a specialized model in medicine, was developed [<xref ref-type="bibr" rid="ref34">34</xref>] but is not available to the public. In addition, we could not include all currently available GAI systems; thus, these findings cannot be generalized to other systems or different clinical scenarios. There was also a risk that these GAI systems may have learned from the published case reports used in this study.</p>
        <p>Moreover, the use of user data to refine models, as seen in Gemini Advanced and Gemini, highlights significant privacy concerns [<xref ref-type="bibr" rid="ref35">35</xref>]. Future research should address the development of locally deployable LLM solutions tailored specifically for CDSS. Although our data set is sourced from an open journal, careful consideration must be given to the ethical deployment of these models within health care settings. Finally, given the rapid pace of GAI technology development, such as the evolution from Bard to Gemini and from ChatGPT-3 to ChatGPT-4 and ChatGPT-4o, our findings may have a limited shelf-life.</p>
      </sec>
      <sec>
        <title>Future Direction</title>
        <p>Future research will aim to explore the diagnostic accuracy of GAI systems following medical enhancements and adjustments. Once approved for medical use, it will also be essential to investigate the performance of GAI systems across various populations and settings, including remote medical consultations, to ensure their effectiveness in real-world diagnostics. Moreover, assessing the impact of AI-enhanced diagnostics on the decision-making process of medical professionals will be crucial.</p>
        <p>In addition, future studies should focus on integrating GAI systems with existing electronic health record systems to understand how AI can augment data accessibility and analysis. This integration will be essential to evaluate how GAI can improve clinical workflows, reduce the cognitive burden and the time to diagnosis, and enhance patient outcomes.</p>
        <p>Finally, the development of ethical guidelines and governance frameworks for the use of GAI in diagnostics is imperative [<xref ref-type="bibr" rid="ref36">36</xref>]. As AI technologies become more prevalent in health care, it is crucial to establish clear protocols that safeguard patient privacy, ensure data security, and maintain transparency in AI decision-making processes.</p>
      </sec>
      <sec>
        <title>Comparison With Previous Work</title>
        <p>Our research builds on previous findings. We revealed that the diagnostic accuracy of ChatGPT-4 was 86.7% (340/392) for the final diagnoses included in the top 10 differential diagnosis lists, and 54.6% (214/392) for the top diagnosis [<xref ref-type="bibr" rid="ref37">37</xref>]. ChatGPT-4’s performances were still higher than that of Gemini in the lists (76.5% vs 86.7%) and as a top diagnosis (42.6% vs 54.6%); it was similar to Gemini Advanced in the lists (73.0% vs 86.7%) and as a top diagnosis (31.6% vs 54.6%).</p>
        <p>Expanding our findings, another study showed that Isabel Pro, a successful CDSS developed by Isabel Healthcare, Ltd [<xref ref-type="bibr" rid="ref38">38</xref>], correctly identified diagnoses in 87.1% (175/201) of cases, compared with 82.1% (165/201) for ChatGPT-4 in a series of clinical cases [<xref ref-type="bibr" rid="ref33">33</xref>]. These findings are partly attributed to the earlier launch of Isabel Pro and the ChatGPT series, allowing them to receive more user feedback and undergo updates to improve performance.</p>
        <p>In addition, another research focused on multiple choice questions on clinical vignettes revealed that ChatGPT-4 achieved a high accuracy rate of 73.3% for Clinical Challenges from the <italic>Journal of the American Medical Association</italic> (<italic>JAMA</italic>) and 88.7% for Image Challenges from the <italic>New England Journal of Medicine</italic> (<italic>NEJM</italic>). In contrast, Gemini, referred to as Gemini Pro in that study, achieved 63.6% for Clinical Challenges from <italic>JAMA</italic> and 68.7% for Image Challenges from the <italic>NEJM</italic> [<xref ref-type="bibr" rid="ref22">22</xref>]. While these previous findings and current results revealed certain diagnostic performances of generative AI systems, comparing these results poses significant challenges due to methodological differences. Variations stem from differences in data set preparation, the types of clinical vignettes used, and the specific challenges or images included, which may influence performance outcomes. In addition, the evaluation criteria used to assess accuracy might differ significantly, affecting the comparability. For instance, the scoring systems or the definitions of a “correct” answer could vary, necessitating caution when drawing direct comparisons between these findings and those of this study.</p>
        <p>In contrast to the serial evaluation approach of a symptom checker [<xref ref-type="bibr" rid="ref29">29</xref>], which demonstrated an accuracy of 44.3% (97/219) in the first year and 47.7% (43/90) in the third year without significant difference, the performance of generative AI systems presents a different dynamic. Specifically, the serial evaluation of generative AI indicated that Gemini outperformed Bard over a relatively short period. This superiority can be attributed in part to the adaptability of generative AI systems to incorporate additional data. However, it is crucial to note that this adaptability does not consistently translate into improved diagnostic accuracy, as evidenced by the current comparison between Gemini Advanced and Bard. This observation highlights the nuanced interplay between technological advancement and clinical efficacy, underscoring the need for continued research and validation in integrating these systems into medical practice effectively.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The results of this study suggest that Gemini outperformed Bard in diagnostic accuracy following the model update. However, Gemini Advanced requires further refinement to optimize its performance for future AI-enhanced diagnostics. These findings should be interpreted cautiously and considered primarily for research purposes, as these GAI systems have not been adjusted for medical diagnostics nor approved for clinical use. The potential and limitations highlighted by this study underscore the need for ongoing development and evaluation of GAI systems within medical diagnostics.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>The PubMed search keywords.</p>
        <media xlink:href="medinform_v12i1e63010_app1.docx" xlink:title="DOCX File , 19 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>The data set of differential diagnosis generated by artificial intelligence systems in this study, alongside the final diagnosis.</p>
        <media xlink:href="medinform_v12i1e63010_app2.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 185 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CDSS</term>
          <def>
            <p>clinical decision support system</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">GAI</term>
          <def>
            <p>generative artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">JAMA</term>
          <def>
            <p>Journal of the American Medical Association</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LaMDA</term>
          <def>
            <p>language model for dialogue applications</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NEJM</term>
          <def>
            <p>New England Journal of Medicine</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was funded by JSPS KAKENHI (grant 22K10421). This study was conducted using resources from the Department of Diagnostics and Generalist Medicine at Dokkyo Medical University.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>TH, YH, KT, TI, T Suzuki, and T Shimizu contributed to the study of concept and design. TH performed the statistical analyses. TH contributed to the drafting of the manuscript. YH, KT, TI, T Suzuki, and T Shimizu contributed to the critical revision of the manuscript for relevant intellectual content. All the authors have read and approved the final version of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Newman-Toker</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Nassery</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Schaffer</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Yu-Moe</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Clemens</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Saber Tehrani</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Fanai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hassoon</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Siegal</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Burden of serious harms from diagnostic error in the USA</article-title>
          <source>BMJ Qual Saf</source>
          <year>2024</year>
          <volume>33</volume>
          <issue>2</issue>
          <fpage>109</fpage>
          <lpage>120</lpage>
          <pub-id pub-id-type="doi">10.1136/bmjqs-2021-014130</pub-id>
          <pub-id pub-id-type="medline">37460118</pub-id>
          <pub-id pub-id-type="pii">bmjqs-2021-014130</pub-id>
          <pub-id pub-id-type="pmcid">PMC10792094</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Connor</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Dhaliwal</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Five strategies for clinicians to advance diagnostic excellence</article-title>
          <source>BMJ</source>
          <year>2022</year>
          <volume>376</volume>
          <fpage>e068044</fpage>
          <pub-id pub-id-type="doi">10.1136/bmj-2021-068044</pub-id>
          <pub-id pub-id-type="medline">35172968</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Balogh</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>BT</given-names>
            </name>
            <name name-style="western">
              <surname>Ball</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <source>Improving Diagnosis in Health Care</source>
          <year>2015</year>
          <publisher-loc>Washington DC</publisher-loc>
          <publisher-name>National Academies Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sutton</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Pincock</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Baumgart</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Sadowski</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Fedorak</surname>
              <given-names>RN</given-names>
            </name>
            <name name-style="western">
              <surname>Kroeker</surname>
              <given-names>KI</given-names>
            </name>
          </person-group>
          <article-title>An overview of clinical decision support systems: benefits, risks, and strategies for success</article-title>
          <source>NPJ Digit Med</source>
          <year>2020</year>
          <volume>3</volume>
          <fpage>17</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-020-0221-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-020-0221-y</pub-id>
          <pub-id pub-id-type="medline">32047862</pub-id>
          <pub-id pub-id-type="pii">221</pub-id>
          <pub-id pub-id-type="pmcid">PMC7005290</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kawamoto</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Houlihan</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Balas</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Lobach</surname>
              <given-names>DF</given-names>
            </name>
          </person-group>
          <article-title>Improving clinical practice using clinical decision support systems: a systematic review of trials to identify features critical to success</article-title>
          <source>BMJ</source>
          <year>2005</year>
          <volume>330</volume>
          <issue>7494</issue>
          <fpage>765</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/15767266"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.38398.500764.8F</pub-id>
          <pub-id pub-id-type="medline">15767266</pub-id>
          <pub-id pub-id-type="pii">bmj.38398.500764.8F</pub-id>
          <pub-id pub-id-type="pmcid">PMC555881</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bright</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dhurjati</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bristow</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bastian</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Coeytaux</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Samsa</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hasselblad</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Musty</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Wing</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kendrick</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Sanders</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Lobach</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Effect of clinical decision-support systems: a systematic review</article-title>
          <source>Ann Intern Med</source>
          <year>2012</year>
          <volume>157</volume>
          <issue>1</issue>
          <fpage>29</fpage>
          <lpage>43</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.acpjournals.org/doi/abs/10.7326/0003-4819-157-1-201207030-00450?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.7326/0003-4819-157-1-201207030-00450</pub-id>
          <pub-id pub-id-type="medline">22751758</pub-id>
          <pub-id pub-id-type="pii">1206700</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sibbald</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Monteiro</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sherbino</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>LoGiudice</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Norman</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Should electronic differential diagnosis support be used early or late in the diagnostic process? A multicentre experimental study of isabel</article-title>
          <source>BMJ Qual Saf</source>
          <year>2022</year>
          <volume>31</volume>
          <issue>6</issue>
          <fpage>426</fpage>
          <lpage>433</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://qualitysafety.bmj.com/lookup/pmidlookup?view=long&#38;pmid=34611040"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjqs-2021-013493</pub-id>
          <pub-id pub-id-type="medline">34611040</pub-id>
          <pub-id pub-id-type="pii">bmjqs-2021-013493</pub-id>
          <pub-id pub-id-type="pmcid">PMC9132870</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Baalen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Boon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Verhoef</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>From clinical decision support to clinical reasoning support systems</article-title>
          <source>J Eval Clin Pract</source>
          <year>2021</year>
          <volume>27</volume>
          <issue>3</issue>
          <fpage>520</fpage>
          <lpage>528</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33554432"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/jep.13541</pub-id>
          <pub-id pub-id-type="medline">33554432</pub-id>
          <pub-id pub-id-type="pmcid">PMC8248191</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Riches</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Panagioti</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Alam</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cheraghi-Sohi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Esmail</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bower</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The effectiveness of electronic differential diagnoses (DDX) generators: a systematic review and meta-analysis</article-title>
          <source>PLoS One</source>
          <year>2016</year>
          <volume>11</volume>
          <issue>3</issue>
          <fpage>e0148991</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0148991"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0148991</pub-id>
          <pub-id pub-id-type="medline">26954234</pub-id>
          <pub-id pub-id-type="pii">PONE-D-15-38539</pub-id>
          <pub-id pub-id-type="pmcid">PMC4782994</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schmieding</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Kopka</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz-Niethammer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Balzer</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Feufel</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Triage accuracy of symptom checker apps: 5-year follow-up evaluation</article-title>
          <source>J Med Internet Res</source>
          <year>2022</year>
          <volume>24</volume>
          <issue>5</issue>
          <fpage>e31810</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2022/5/e31810/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/31810</pub-id>
          <pub-id pub-id-type="medline">35536633</pub-id>
          <pub-id pub-id-type="pii">v24i5e31810</pub-id>
          <pub-id pub-id-type="pmcid">PMC9131144</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Castaneda</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Nalley</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mannion</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bhattacharyya</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Blake</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pecora</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Goy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Suh</surname>
              <given-names>KS</given-names>
            </name>
          </person-group>
          <article-title>Clinical decision support systems for improving diagnostic accuracy and achieving precision medicine</article-title>
          <source>J Clin Bioinforma</source>
          <year>2015</year>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>4</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jclinbioinformatics.biomedcentral.com/articles/10.1186/s13336-015-0019-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13336-015-0019-3</pub-id>
          <pub-id pub-id-type="medline">25834725</pub-id>
          <pub-id pub-id-type="pii">19</pub-id>
          <pub-id pub-id-type="pmcid">PMC4381462</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Khuntia</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Parameswaran</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Meyers</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Preliminary evidence of the use of generative AI in health care clinical services: systematic narrative review</article-title>
          <source>JMIR Med Inform</source>
          <year>2024</year>
          <volume>12</volume>
          <fpage>e52073</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2024//e52073/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/52073</pub-id>
          <pub-id pub-id-type="medline">38506918</pub-id>
          <pub-id pub-id-type="pii">v12i1e52073</pub-id>
          <pub-id pub-id-type="pmcid">PMC10993141</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gaur</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sai</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chamola</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Guizani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rodrigues</surname>
              <given-names>JJPC</given-names>
            </name>
          </person-group>
          <article-title>Generative AI for transformative healthcare: a comprehensive study of emerging models, applications, case studies, and limitations</article-title>
          <source>IEEE Access</source>
          <year>2024</year>
          <volume>12</volume>
          <fpage>31078</fpage>
          <lpage>31106</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2024.3367715</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>WX</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Min</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>A survey of large language models</article-title>
          <source>Computer Science Computation and Language</source>
          <year>2023</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2303.18223"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karampinis</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Toli</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Georgopoulou</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Kampra</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Spyridonidou</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Roussaki Schulze</surname>
              <given-names>AV</given-names>
            </name>
            <name name-style="western">
              <surname>Zafiriou</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Can artificial intelligence "Hold" a dermoscope?-The evaluation of an artificial intelligence chatbot to translate the dermoscopic language</article-title>
          <source>Diagnostics (Basel)</source>
          <year>2024</year>
          <volume>14</volume>
          <issue>11</issue>
          <fpage>1165</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=diagnostics14111165"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/diagnostics14111165</pub-id>
          <pub-id pub-id-type="medline">38893694</pub-id>
          <pub-id pub-id-type="pii">diagnostics14111165</pub-id>
          <pub-id pub-id-type="pmcid">PMC11171543</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pichai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hassabis</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Introducing Gemini: our largest and most capable AI model</article-title>
          <source>Google</source>
          <year>2023</year>
          <access-date>2023-12-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://blog.google/technology/ai/google-gemini-ai/">https://blog.google/technology/ai/google-gemini-ai/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Team</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Anil</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Borgeaud</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Alayrac</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Soricut</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Schalkwyk</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Gemini: a family of highly capable multimodal models</article-title>
          <source>Computer Science Computation and Language</source>
          <year>2023</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2312.11805"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>OY</given-names>
            </name>
            <name name-style="western">
              <surname>Connolly</surname>
              <given-names>ID</given-names>
            </name>
            <name name-style="western">
              <surname>Fridley</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Zadnik Sullivan</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Cielo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Oyelese</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Doberstein</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Telfeian</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Gokaslan</surname>
              <given-names>ZL</given-names>
            </name>
            <name name-style="western">
              <surname>Asaad</surname>
              <given-names>WF</given-names>
            </name>
          </person-group>
          <article-title>Performance of ChatGPT, GPT-4, and Google bard on a neurosurgery oral boards preparation question bank</article-title>
          <source>Neurosurgery</source>
          <year>2023</year>
          <volume>93</volume>
          <issue>5</issue>
          <fpage>1090</fpage>
          <lpage>1098</lpage>
          <pub-id pub-id-type="doi">10.1227/neu.0000000000002551</pub-id>
          <pub-id pub-id-type="pii">00006123-990000000-00775</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Doshi</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Amin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Khosla</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bajaj</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chheang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Forman</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Utilizing large language models to simplify radiology reports: a comparative analysis of ChatGPT-3.5, ChatGPT-4.0, Google bard, and microsoft bing</article-title>
          <source>medRxiv</source>
          <year>2023</year>
          <fpage>23290786</fpage>
          <pub-id pub-id-type="doi">10.1101/2023.06.04.23290786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amin</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Mayes</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Khosla</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Doshi</surname>
              <given-names>RH</given-names>
            </name>
          </person-group>
          <article-title>Assessing the efficacy of large language models in health literacy: a comprehensive cross-sectional study</article-title>
          <source>Yale J Biol Med</source>
          <year>2024</year>
          <volume>97</volume>
          <issue>1</issue>
          <fpage>17</fpage>
          <lpage>27</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38559461"/>
          </comment>
          <pub-id pub-id-type="doi">10.59249/ZTOZ1966</pub-id>
          <pub-id pub-id-type="medline">38559461</pub-id>
          <pub-id pub-id-type="pii">yjbm97117</pub-id>
          <pub-id pub-id-type="pmcid">PMC10964816</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hirosawa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mizuta</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Harada</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shimizu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Comparative evaluation of diagnostic accuracy between Google bard and physicians</article-title>
          <source>Am J Med</source>
          <year>2023</year>
          <volume>136</volume>
          <issue>11</issue>
          <fpage>1119</fpage>
          <lpage>1123.e18</lpage>
          <pub-id pub-id-type="doi">10.1016/j.amjmed.2023.08.003</pub-id>
          <pub-id pub-id-type="medline">37643659</pub-id>
          <pub-id pub-id-type="pii">S0002-9343(23)00536-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Bressem</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Busch</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Nebelung</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Truhn</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Comparative analysis of multimodal large language model performance on clinical vignette questions</article-title>
          <source>JAMA</source>
          <year>2024</year>
          <volume>331</volume>
          <issue>15</issue>
          <fpage>1320</fpage>
          <lpage>1321</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2023.27861</pub-id>
          <pub-id pub-id-type="medline">38497956</pub-id>
          <pub-id pub-id-type="pii">2816270</pub-id>
          <pub-id pub-id-type="pmcid">PMC10949144</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Graber</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Mathew</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Performance of a web-based clinical diagnosis support system for internists</article-title>
          <source>J Gen Intern Med</source>
          <year>2008</year>
          <volume>23 Suppl 1</volume>
          <issue>Suppl 1</issue>
          <fpage>37</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/18095042"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-007-0271-8</pub-id>
          <pub-id pub-id-type="medline">18095042</pub-id>
          <pub-id pub-id-type="pmcid">PMC2150633</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shahrudin</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Mohamed-Yassin</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Nik Mohd Nasir</surname>
              <given-names>NM</given-names>
            </name>
          </person-group>
          <article-title>Herpes zoster following COVID-19 vaccine booster</article-title>
          <source>Am J Case Rep</source>
          <year>2023</year>
          <volume>24</volume>
          <fpage>e938667</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://amjcaserep.com/download/index/idArt/938667"/>
          </comment>
          <pub-id pub-id-type="doi">10.12659/AJCR.938667</pub-id>
          <pub-id pub-id-type="medline">36650730</pub-id>
          <pub-id pub-id-type="pii">938667</pub-id>
          <pub-id pub-id-type="pmcid">PMC9874951</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krupat</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wormwood</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartzstein</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>Avoiding premature closure and reaching diagnostic accuracy: some key predictive factors</article-title>
          <source>Med Educ</source>
          <year>2017</year>
          <volume>51</volume>
          <issue>11</issue>
          <fpage>1127</fpage>
          <lpage>1137</lpage>
          <pub-id pub-id-type="doi">10.1111/medu.13382</pub-id>
          <pub-id pub-id-type="medline">28857266</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fleiss</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Paik</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <source>Statistical Methods for Rates and Proportions</source>
          <year>2003</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>John Wiley &#38; Sons</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Falk</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gross</surname>
              <given-names>WL</given-names>
            </name>
            <name name-style="western">
              <surname>Guillevin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Jayne</surname>
              <given-names>DRW</given-names>
            </name>
            <name name-style="western">
              <surname>Jennette</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Kallenberg</surname>
              <given-names>CGM</given-names>
            </name>
            <name name-style="western">
              <surname>Luqmani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mahr</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Matteson</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Merkel</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Specks</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Watts</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Granulomatosis with polyangiitis (Wegener's): an alternative name for Wegener's granulomatosis</article-title>
          <source>Ann Rheum Dis</source>
          <year>2011</year>
          <volume>70</volume>
          <issue>4</issue>
          <fpage>704</fpage>
          <pub-id pub-id-type="doi">10.1136/ard.2011.150714</pub-id>
          <pub-id pub-id-type="medline">21372195</pub-id>
          <pub-id pub-id-type="pii">70/4/704</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rhee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dantes</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Epstein</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Seymour</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Iwashyna</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kadri</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Angus</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Danner</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Fiore</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Jernigan</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Septimus</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Warren</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Karcz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Menchaca</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gruber</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Klompas</surname>
              <given-names>M</given-names>
            </name>
            <collab>CDC Prevention Epicenter Program</collab>
          </person-group>
          <article-title>Incidence and trends of sepsis in US hospitals using clinical vs claims data, 2009-2014</article-title>
          <source>JAMA</source>
          <year>2017</year>
          <volume>318</volume>
          <issue>13</issue>
          <fpage>1241</fpage>
          <lpage>1249</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28903154"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2017.13836</pub-id>
          <pub-id pub-id-type="medline">28903154</pub-id>
          <pub-id pub-id-type="pii">2654187</pub-id>
          <pub-id pub-id-type="pmcid">PMC5710396</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harada</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sakamoto</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sugimoto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shimizu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Longitudinal changes in diagnostic accuracy of a differential diagnosis list developed by an AI-Based symptom checker: retrospective observational study</article-title>
          <source>JMIR Form Res</source>
          <year>2024</year>
          <volume>8</volume>
          <fpage>e53985</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2024//e53985/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/53985</pub-id>
          <pub-id pub-id-type="medline">38758588</pub-id>
          <pub-id pub-id-type="pii">v8i1e53985</pub-id>
          <pub-id pub-id-type="pmcid">PMC11143391</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Barber</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Kienle</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>von Schoen-Angerer</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tugwell</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kiene</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Helfand</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Sox</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Werthmann</surname>
              <given-names>PG</given-names>
            </name>
            <name name-style="western">
              <surname>Moher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rison</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Shamseer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Koch</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Hanaway</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sudak</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Kaszkin-Bettag</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Carpenter</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Gagnier</surname>
              <given-names>JJ</given-names>
            </name>
          </person-group>
          <article-title>CARE guidelines for case reports: explanation and elaboration document</article-title>
          <source>J Clin Epidemiol</source>
          <year>2017</year>
          <volume>89</volume>
          <fpage>218</fpage>
          <lpage>235</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2017.04.026</pub-id>
          <pub-id pub-id-type="medline">28529185</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(17)30037-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kanjee</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Crowe</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rodman</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of a generative artificial intelligence model in a complex diagnostic challenge</article-title>
          <source>JAMA</source>
          <year>2023</year>
          <volume>330</volume>
          <issue>1</issue>
          <fpage>78</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37318797"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2023.8288</pub-id>
          <pub-id pub-id-type="medline">37318797</pub-id>
          <pub-id pub-id-type="pii">2806457</pub-id>
          <pub-id pub-id-type="pmcid">PMC10273128</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bond</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Weaver</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Levick</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Giuliano</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Graber</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>Differential diagnosis generators: an evaluation of currently available computer programs</article-title>
          <source>J Gen Intern Med</source>
          <year>2012</year>
          <volume>27</volume>
          <issue>2</issue>
          <fpage>213</fpage>
          <lpage>219</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21789717"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-011-1804-8</pub-id>
          <pub-id pub-id-type="medline">21789717</pub-id>
          <pub-id pub-id-type="pmcid">PMC3270234</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bridges</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Computerized diagnostic decision support systems - a comparative performance study of isabel pro vs. ChatGPT4</article-title>
          <source>Diagnosis (Berl)</source>
          <year>2024</year>
          <volume>11</volume>
          <issue>3</issue>
          <fpage>250</fpage>
          <lpage>258</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.degruyter.com/document/doi/10.1515/dx-2024-0033"/>
          </comment>
          <pub-id pub-id-type="doi">10.1515/dx-2024-0033</pub-id>
          <pub-id pub-id-type="medline">38709491</pub-id>
          <pub-id pub-id-type="pii">dx-2024-0033</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saab</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Tanno</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Stutz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wulczyn</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Strother</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Capabilities of gemini models in medicine</article-title>
          <source>Computer Science Artificial Intelligence</source>
          <year>2024</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2404.18416"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <source>Gemini apps privacy notice</source>
          <access-date>2024-05-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://support.google.com/gemini/answer/13594961#privacy_notice">https://support.google.com/gemini/answer/13594961#privacy_notice</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Newman-Toker</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Sharfstein</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>The role for policy in AI-Assisted medical diagnosis</article-title>
          <source>JAMA Health Forum</source>
          <year>2024</year>
          <volume>5</volume>
          <issue>4</issue>
          <fpage>e241339</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jamanetwork.com/article.aspx?doi=10.1001/jamahealthforum.2024.1339"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamahealthforum.2024.1339</pub-id>
          <pub-id pub-id-type="medline">38635262</pub-id>
          <pub-id pub-id-type="pii">2818016</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hirosawa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Harada</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mizuta</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sakamoto</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tokumasu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Shimizu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Diagnostic performance of generative artificial intelligences for a series of complex case reports</article-title>
          <source>DIGITAL HEALTH</source>
          <year>2024</year>
          <volume>10</volume>
          <pub-id pub-id-type="doi">10.1177/20552076241265215</pub-id>
          <pub-id pub-id-type="medline">39229463</pub-id>
          <pub-id pub-id-type="pmcid">PMC11369864</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>LY</given-names>
            </name>
          </person-group>
          <article-title>Isabel pro</article-title>
          <source>J Can Health Libr Assoc</source>
          <year>2019</year>
          <volume>40</volume>
          <issue>2</issue>
          <fpage>63</fpage>
          <lpage>69</lpage>
          <pub-id pub-id-type="doi">10.29173/jchla29418</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
