<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="review-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v12i1e62924</article-id>
      <article-id pub-id-type="pmid">39374057</article-id>
      <article-id pub-id-type="doi">10.2196/62924</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Review</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Review</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Use of SNOMED CT in Large Language Models: Scoping Review</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhu</surname>
            <given-names>Shengxin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Karen</surname>
            <given-names>Triep</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>van Mens</surname>
            <given-names>Hugo J. T</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Gaudet-Blavignac</surname>
            <given-names>Christophe</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Chang</surname>
            <given-names>Eunsuk</given-names>
          </name>
          <degrees>MD, MPH, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1350-3606</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Sung</surname>
            <given-names>Sumi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Nursing Science</institution>
            <institution>Research Institute of Nursing Science</institution>
            <institution>Chungbuk National University</institution>
            <addr-line>1 Chungdae-ro</addr-line>
            <addr-line>Seowon-gu</addr-line>
            <addr-line>Cheongju, 28644</addr-line>
            <country>Republic of Korea</country>
            <fax>82 43 266 1710</fax>
            <phone>82 43 249 1731</phone>
            <email>sumisung@cbnu.ac.kr</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3897-4698</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Republic of Korea Air Force Aerospace Medical Center</institution>
        <addr-line>Cheongju</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Nursing Science</institution>
        <institution>Research Institute of Nursing Science</institution>
        <institution>Chungbuk National University</institution>
        <addr-line>Cheongju</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Sumi Sung <email>sumisung@cbnu.ac.kr</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>7</day>
        <month>10</month>
        <year>2024</year>
      </pub-date>
      <volume>12</volume>
      <elocation-id>e62924</elocation-id>
      <history>
        <date date-type="received">
          <day>4</day>
          <month>6</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>3</day>
          <month>7</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>22</day>
          <month>7</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>15</day>
          <month>9</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Eunsuk Chang, Sumi Sung. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 07.10.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2024/1/e62924" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Large language models (LLMs) have substantially advanced natural language processing (NLP) capabilities but often struggle with knowledge-driven tasks in specialized domains such as biomedicine. Integrating biomedical knowledge sources such as SNOMED CT into LLMs may enhance their performance on biomedical tasks. However, the methodologies and effectiveness of incorporating SNOMED CT into LLMs have not been systematically reviewed.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This scoping review aims to examine how SNOMED CT is integrated into LLMs, focusing on (1) the types and components of LLMs being integrated with SNOMED CT, (2) which contents of SNOMED CT are being integrated, and (3) whether this integration improves LLM performance on NLP tasks.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Following the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) guidelines, we searched ACM Digital Library, ACL Anthology, IEEE Xplore, PubMed, and Embase for relevant studies published from 2018 to 2023. Studies were included if they incorporated SNOMED CT into LLM pipelines for natural language understanding or generation tasks. Data on LLM types, SNOMED CT integration methods, end tasks, and performance metrics were extracted and synthesized.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The review included 37 studies. Bidirectional Encoder Representations from Transformers and its biomedical variants were the most commonly used LLMs. Three main approaches for integrating SNOMED CT were identified: (1) incorporating SNOMED CT into LLM inputs (28/37, 76%), primarily using concept descriptions to expand training corpora; (2) integrating SNOMED CT into additional fusion modules (5/37, 14%); and (3) using SNOMED CT as an external knowledge retriever during inference (5/37, 14%). The most frequent end task was medical concept normalization (15/37, 41%), followed by entity extraction or typing and classification. While most studies (17/19, 89%) reported performance improvements after SNOMED CT integration, only a small fraction (19/37, 51%) provided direct comparisons. The reported gains varied widely across different metrics and tasks, ranging from 0.87% to 131.66%. However, some studies showed either no improvement or a decline in certain performance metrics.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This review demonstrates diverse approaches for integrating SNOMED CT into LLMs, with a focus on using concept descriptions to enhance biomedical language understanding and generation. While the results suggest potential benefits of SNOMED CT integration, the lack of standardized evaluation methods and comprehensive performance reporting hinders definitive conclusions about its effectiveness. Future research should prioritize consistent reporting of performance comparisons and explore more sophisticated methods for incorporating SNOMED CT’s relational structure into LLMs. In addition, the biomedical NLP community should develop standardized evaluation frameworks to better assess the impact of ontology integration on LLM performance.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>SNOMED CT</kwd>
        <kwd>ontology</kwd>
        <kwd>knowledge graph</kwd>
        <kwd>large language models</kwd>
        <kwd>natural language processing</kwd>
        <kwd>language models</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>The recent emergence of large language models (LLMs), exemplified by Bidirectional Encoder Representations from Transformers (BERT) [<xref ref-type="bibr" rid="ref1">1</xref>] and GPT [<xref ref-type="bibr" rid="ref2">2</xref>], has significantly advanced the capabilities of machines in natural language understanding (NLU) and natural language generation (NLG). Despite achieving state-of-the-art performance on a range of natural language processing (NLP) tasks, LLMs exhibit a deficiency in knowledge when confronted with knowledge-driven tasks [<xref ref-type="bibr" rid="ref3">3</xref>]. These models acquire factual information from extensive text corpora during training, embedding this knowledge implicitly within their numerous parameters and consequently posing challenges in terms of verification and manipulation [<xref ref-type="bibr" rid="ref4">4</xref>]. Moreover, numerous studies have demonstrated that LLMs struggle to recall facts and frequently encounter hallucinations, generating factually inaccurate statements [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. This poses a significant obstacle to the effective application of LLMs in critical scenarios, such as medical diagnosis and legal judgment [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
        <p>Efforts have been made to address the black box nature of LLMs and mitigate potential hallucination problems. Approaches include enhancing language model (LM) veracity through strategies such as retrieval chain-of-thought prompting [<xref ref-type="bibr" rid="ref8">8</xref>] and retrieval-augmented generation [<xref ref-type="bibr" rid="ref9">9</xref>]. Another significant avenue involves integrating knowledge graphs (KGs) or ontologies into LMs using triple relations or KG subgraphs [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. KGs, renowned for their excellence in representing knowledge within a domain, can provide answers when combined with LMs [<xref ref-type="bibr" rid="ref11">11</xref>], making them valuable for common sense–based reasoning and fact-checking models [<xref ref-type="bibr" rid="ref12">12</xref>]. However, LLMs often face challenges when trained and tested predominantly on general-domain datasets or KGs, such as Wikipedia and WordNet [<xref ref-type="bibr" rid="ref13">13</xref>], making it difficult to gauge their performance on datasets containing biomedical texts. The differing word distributions in general and biomedical corpora pose challenges for biomedical text mining models [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
        <p>Biomedicine-specific KGs may be a potential solution to the abovementioned problems. In the biomedical domain, KGs, also known as ontologies, are relatively abundant, with the Unified Medical Language System (UMLS) [<xref ref-type="bibr" rid="ref15">15</xref>] being one of the most frequently used ontologies [<xref ref-type="bibr" rid="ref16">16</xref>]. The UMLS serves as a thesaurus for biomedical terminology systems such as the Medical Subject Headings, International Classification of Diseases, Gene Ontology, Human Phenotype Ontology, and SNOMED CT, all curated and managed by the United States National Library of Medicine.</p>
        <p>Among UMLS member terminologies, SNOMED CT stands out as the most comprehensive biomedical ontology, encompassing a wide range of biomedical and clinical entities, including signs, symptoms, diseases, procedures, and social contexts [<xref ref-type="bibr" rid="ref17">17</xref>]. These entities are represented by concepts (clinical ideas), descriptions (human-readable terms linked to concepts), and relations (comprising hierarchical <italic>is-a</italic> relations and horizontal attribute relations). As SNOMED CT is increasingly integrated into electronic health record (EHR) systems, as required by the Fast Healthcare Interoperability Resource (FHIR) to ensure interoperability among health care institutions [<xref ref-type="bibr" rid="ref18">18</xref>], terminology servers supporting SNOMED CT have become ubiquitous. With its ready availability across health care institutions, SNOMED CT has gained attention as a knowledge source or ontology for representing biomedical and clinical knowledge [<xref ref-type="bibr" rid="ref17">17</xref>]. In this case, the abstract model of SNOMED CT is used to describe and store biomedical facts in a hierarchical and structured manner, readily available across health care institutions.</p>
        <p>Integrating SNOMED CT into LLMs holds significant potential for advancing various aspects of health care and biomedical research. By incorporating the comprehensive and structured biomedical knowledge from SNOMED CT, LLMs can better understand medical terminology, relationships between clinical concepts, and domain-specific context, potentially reducing errors and hallucinations when understanding or generating biomedical texts. This integration could enhance clinical decision support systems, improve the accuracy of automated coding and billing processes, facilitate more precise information retrieval from medical literature, and support the development of personalized medicine approaches. Furthermore, it may enable more accurate NLP of clinical notes and medical records, potentially leading to improved patient care and outcomes through better data analysis and insights.</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>This scoping review aimed to examine the use of SNOMED CT as a knowledge source to be incorporated into LLMs, specifically focusing on the methodology of integrating these 2 modalities. This review sought to answer the following research questions: (1) What are the dominant types and components of LLMs being integrated with SNOMED CT? (2) Which contents of SNOMED CT (ie, descriptions, relations, or entity classes) are being integrated into LLMs? and (3) Does the integration of SNOMED CT into LLMs improve the performance on NLP tasks in terms of NLU and NLG? Answers to these questions could suggest future methodological approaches for more effectively integrating human-engineered knowledge into LLMs.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>This scoping review was guided by the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) framework, which outlines the recommended steps and reporting standards for conducting scoping reviews (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
      <sec>
        <title>Study Identification</title>
        <p>We defined LLMs as transformer-based LMs pretrained on large-scale corpora [<xref ref-type="bibr" rid="ref20">20</xref>] (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). Given that transformer-based models currently dominate in the field and are likely to continue doing so in the coming years, reviewing other LMs, such as recurrent neural networks and more conventional statistical models, does not hold scientific significance for current and future applications. Therefore, focusing on transformer-based models allows a more cohesive and in-depth analysis of the most relevant and cutting-edge techniques in the field.</p>
        <p>To explore scientific literature describing transformer-based models, we conducted our literature search on ACM Digital Library, ACL Anthology, IEEE Xplore, PubMed, and Embase on March 12, 2024, using the following query terms: (1) (“language *model” OR “pretrained *model” OR “language processing” OR “embedding”) AND (“SNOMED” OR “Unified Medical Language System” OR “UMLS” OR “*medical”) AND (“knowledge graph” OR “ontolog*” OR “knowledge*base” OR “knowledge infusion”) and (2) (“SNOMED”) AND (“large language model” OR “BERT” OR “GPT”). Queries were modified according to the bibliographic databases when necessary. Queries were designed to search for articles published from 2018 to 2023. The start date of the query was set to 2018 when BERT, the first transformer-based LM to gain widespread adoption, was introduced, marking the beginning of significant research into transformer-based LLMs.</p>
      </sec>
      <sec>
        <title>Study Selection</title>
        <p>Articles were extracted from ACM Digital Library, ACL Anthology, IEEE Xplore, PubMed, and Embase. Duplicates were removed, and 2 authors (SS and EC) examined the full text of the retrieved articles for the presence of the term “SNOMED.” We prioritized a full-text search first before title and abstract review because many potentially eligible papers do not explicitly mention “SNOMED” in their titles or abstracts. To be eligible for our review, articles had to have SNOMED CT incorporated into NLP pipelines, which encompass processes from text cleansing through pretraining and inference to model evaluation, specifically for tasks involving NLU and NLG. We then further excluded studies that met ≥1 of the following criteria: (1) published in languages other than English; (2) categorized as reviews, surveys, keynotes, or editorial articles; (3) did not incorporate SNOMED CT at any stage of the NLP pipeline; (4) aimed to create, develop, enrich, or enhance ontologies or graphs; (5) did not involve the processing of natural language (NL) text; or (6) solely used SNOMED CT codes for retrieving patients of interest from EHRs or for annotating instances with SNOMED CT codes as gold-standard target labels for LM training.</p>
      </sec>
      <sec>
        <title>Result Synthesis</title>
        <p>Through discussions and qualitative assessments, we analyzed the included articles according to the following characteristics: chronological and geographic publication trends, baseline LLM and its output, dataset used for training and testing the model, methods for integrating SNOMED CT into the LLM, and the model’s end task and performance (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>).</p>
        <boxed-text id="box1" position="float">
          <title>Methods for synthesizing the review.</title>
          <p>
            <bold>Synthesis of results</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Chronological and geographic publication trends</p>
            </list-item>
            <list-item>
              <p>Baseline large language model (LLM) and its output</p>
            </list-item>
            <list-item>
              <p>Dataset used for training and testing the model</p>
            </list-item>
            <list-item>
              <p>Methods for integrating SNOMED CT into the LLM (methodologies for knowledge graph [KG]–enhanced LLMs [<xref ref-type="bibr" rid="ref7">7</xref>])</p>
              <list list-type="bullet">
                <list-item>
                  <p>KG-enhanced LLM pretraining: works that apply KGs during the pretraining stage and improve the knowledge expression of LLMs</p>
                </list-item>
                <list-item>
                  <p>KG-enhanced LLM interpretability: works that use KGs to understand the knowledge learned by LLMs and interpret the reasoning process of LLMs</p>
                </list-item>
                <list-item>
                  <p>KG-enhanced LLM inference: research that uses KGs during the inference stage of LLMs, which enables LLMs to access the latest knowledge without retraining</p>
                </list-item>
              </list>
            </list-item>
            <list-item>
              <p>End task and performance</p>
              <list>
                <list-item>
                  <p>
                            
                                End task natural language understanding: entity recognition or typing, entity or relation extraction, document classification, question answering (multiple choice), and inference
                                End task natural language generation: text summarization, question answering (short or essay answers), translation, and dialogue generation
                                Performance analysis: nominal percentage gains in performance after SNOMED CT integration
                            
                        </p>
                </list-item>
              </list>
            </list-item>
          </list>
        </boxed-text>
        <p>We elucidated the methodology for incorporating SNOMED CT into NLP pipelines following the categorization methods previously outlined by Pan et al [<xref ref-type="bibr" rid="ref7">7</xref>]. These methods categorized methodologies for KG-enhanced LLMs into three distinctive types: (1) KG-enhanced LLM pretraining, (2) KG-enhanced LLM interpretability, and (3) KG-enhanced LLM inference. The end tasks of LLMs after SNOMED CT integration included NLU and NLG. Regarding the performance analysis, we presented the nominal percentage gains in performance after SNOMED CT integration without analyzing their statistical significance, as most studies did not perform statistical significance testing. We refrained from conducting direct study-to-study comparisons due to concerns about the heterogeneity of testing corpora and evaluation metrics across different studies.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Selected Papers</title>
        <p>The query yielded 876 articles from the 5 bibliographic databases, with 634 (72.4%) obtained from the first query and 242 (27.6%) from the second query (<xref rid="figure1" ref-type="fig">Figure 1</xref>). After the removal of duplicates, 812 (92.7%) articles were reviewed to check whether the term “SNOMED” was mentioned in their full texts. A total of 325 (37.1%) articles were then reviewed according to the inclusion and exclusion criteria. Consequently, 37 (4.2%) publications were finally selected for the scoping review (<xref rid="figure1" ref-type="fig">Figure 1</xref>). The characteristics of the individual papers and other features, including the language of used datasets and SNOMED CT descriptions, other ontologies used, and the types of entities represented by SNOMED CT, are detailed in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) flow diagram of article selection. SCT: SNOMED CT.</p>
          </caption>
          <graphic xlink:href="medinform_v12i1e62924_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Chronological and Geographic Publication Trends</title>
        <p><xref ref-type="table" rid="table1">Table 1</xref> presents the publication trends noted in the review. Although our literature search covered publications from 2018 onward, no studies published in 2018 were included in the final review. The largest volume of studies was published in 2022 (13/37, 35%), followed by those published in 2020 (10/37, 27%).</p>
        <p>When the number of countries was counted according to the first authors’ institutional affiliations, the largest number of studies was noted to originate from the United States (10/37, 27%). While most of the studies (26/37, 70%) were conducted in countries that are members of SNOMED International, some were performed in nonmember countries such as Bulgaria and China, where separate license fees and in-house translation of SNOMED CT descriptions to the local language were required.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Chronological and geographic publication trends among the included studies.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Study characteristics</td>
                <td>Studies</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Publication year</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2019</td>
                <td>[<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2020</td>
                <td>[<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref33">33</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2021</td>
                <td>[<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2022</td>
                <td>[<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref49">49</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2023</td>
                <td>[<xref ref-type="bibr" rid="ref50">50</xref>-<xref ref-type="bibr" rid="ref57">57</xref>]</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Countries</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Australia</td>
                <td>[<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Bulgaria</td>
                <td>[<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Canada</td>
                <td>[<xref ref-type="bibr" rid="ref55">55</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>China (including Hong Kong)</td>
                <td>[<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref56">56</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Germany</td>
                <td>[<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref51">51</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>India</td>
                <td>[<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Israel</td>
                <td>[<xref ref-type="bibr" rid="ref53">53</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Spain</td>
                <td>[<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>United Kingdom</td>
                <td>[<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref57">57</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>United States</td>
                <td>[<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Publication type</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Journal paper</td>
                <td>[<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref42">42</xref>-<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref55">55</xref>-<xref ref-type="bibr" rid="ref57">57</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Conference paper</td>
                <td>[<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref47">47</xref>-<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref51">51</xref>-<xref ref-type="bibr" rid="ref54">54</xref>]</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Baseline LLMs and Their Outputs</title>
        <p>Most of the included studies (27/37, 73%) used BERT and its variants as the baseline LLMs for NLU and NLG tasks. Variants such as RoBERTa [<xref ref-type="bibr" rid="ref58">58</xref>] and ALBERT [<xref ref-type="bibr" rid="ref59">59</xref>] were also used to address BERT’s relatively small training corpora and long training time [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref53">53</xref>]. To overcome the limited applicability of these general-purpose LLMs to biomedical texts, many studies (13/37, 35%) used LLMs trained on large-scale biomedical corpora, such as BioBERT [<xref ref-type="bibr" rid="ref14">14</xref>] and PubMedBERT [<xref ref-type="bibr" rid="ref60">60</xref>], which were trained on PubMed articles, and ClinicalBERT [<xref ref-type="bibr" rid="ref61">61</xref>] and EHRBERT [<xref ref-type="bibr" rid="ref23">23</xref>], which were trained on clinical notes. SapBERT [<xref ref-type="bibr" rid="ref62">62</xref>], initialized by PubMedBERT, was further fine-tuned using contrastive learning with UMLS synonyms to better accommodate SNOMED CT synonym descriptions [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. To support biomedical NLP tasks in languages other than English, LLMs trained on corpora in those languages were also adopted, such as medBERT.de [<xref ref-type="bibr" rid="ref63">63</xref>], designed specifically for the German medical domain [<xref ref-type="bibr" rid="ref51">51</xref>], and ERNIE-health, pretrained from Chinese medical records [<xref ref-type="bibr" rid="ref41">41</xref>]. Aside from these BERT-based models, GPT emerged as a new baseline LLM since 2023. Makhervaks et al [<xref ref-type="bibr" rid="ref53">53</xref>] used BioGPT [<xref ref-type="bibr" rid="ref64">64</xref>], whose decoder was pretrained on biomedical corpora, to enhance the generation of artificial sentences. In addition, Xu et al [<xref ref-type="bibr" rid="ref55">55</xref>] used GPT-3.5 for ranking suggested annotation terms in their study (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <p>A primary assertive role of LLMs was representing biomedical entities from text data. While most proposed methods produced embedding vectors to convey contextual information about the biomedical entities that appeared in texts, Kalyan and Sangeetha [<xref ref-type="bibr" rid="ref31">31</xref>] introduced a Siamese RoBERTa model to generate concept vectors from synonym relationships defined by SNOMED CT. These basic outputs of LLMs might undergo additional task-specific layers to perform desired end tasks, which will be discussed later. Beyond producing embedding representations of entities, some studies required LLMs to perform classification or ranking tasks after fine-tuning, predicting the most likely relevant standard concepts [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref55">55</xref>], entity types [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref51">51</xref>], sentences [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref53">53</xref>], or matched foreign language words, enabling machine translation [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. LLMs with encoder-decoder architectures, such as BART [<xref ref-type="bibr" rid="ref65">65</xref>], were used for dedicated NLG tasks [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref57">57</xref>].</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Large language models used in the included studies.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="0"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Base and fine-tuned models</td>
                <td>Studies</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>BERT<sup>a</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Vanilla BERT</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref42">42</xref>-<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RoBERTa</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref50">50</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ALBERT</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref53">53</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ELECTRA</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref53">53</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DeBERTa</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref53">53</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>mBERT</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BioBERT</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ClinicalBERT</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>PubMedBERT</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SAPBERT</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>EHRBERT</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref23">23</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SciBERT</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref46">46</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BioELECTRA</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref53">53</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>German BERT models</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref51">51</xref>]</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>GPT</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>GPT-3.5</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref55">55</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BioGPT</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref53">53</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BART</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref57">57</xref>]</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Transformer neural networks</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Transformer NMT<sup>b</sup> model</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Denoising autoencoder</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref32">32</xref>]</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>ERNIE<sup>c</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ERNIE-health</td>
                <td colspan="2">[<xref ref-type="bibr" rid="ref41">41</xref>]</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>BERT: Bidirectional Encoder Representations from Transformers.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>NMT: neural machine translation.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>ERNIE: Enhanced Language Representation with Informative Entities.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Data for Training and Testing Models</title>
        <p>When using general-domain LLMs, authors deployed additional fine-tuning or pretraining on biomedical corpora to better adapt their models for biomedical NLP tasks. The pretraining corpora included PubMed or MEDLINE articles [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref46">46</xref>] and other publicly available datasets, such as Wikipedia articles [<xref ref-type="bibr" rid="ref29">29</xref>] and tweets [<xref ref-type="bibr" rid="ref37">37</xref>] related to biomedical topics. Synthetic sentences were also used to address data scarcity, which was generated based on SNOMED CT descriptions or relations [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        <p>While some studies (8/37, 22%) used real-world clinical narrative records [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref52">52</xref>] or customized (ie, manually annotated by researchers) data [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref56">56</xref>] for testing their models, most of the studies (29/37, 78%) used publicly available datasets, especially when researchers were participating in shared task competitions or dealing with English texts. CADEC [<xref ref-type="bibr" rid="ref66">66</xref>] and PsySTAR [<xref ref-type="bibr" rid="ref67">67</xref>], open datasets built from drug review posts in which concept mentions were mapped to SNOMED CT concepts, were used for validating and testing concept normalization models [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. The Medical Concept Normalization (MCN) corpus, drawn from discharge summaries annotated using SNOMED CT and RxNorm concepts, was experimented on by concept normalization models [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. The WMT corpora, provided by the annual Conference on Machine Translation Shared Tasks, were used to test multilingual machine translation tasks by participating researchers [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Makhervaks et al [<xref ref-type="bibr" rid="ref53">53</xref>] and Chopra et al [<xref ref-type="bibr" rid="ref22">22</xref>] used sentence pairs in the MedNLI corpus [<xref ref-type="bibr" rid="ref68">68</xref>], annotated by medical doctors into 3 categories—contradictory, entailing, and neutral—for NL inference tasks. The MedMentions corpus [<xref ref-type="bibr" rid="ref69">69</xref>] identifies &gt;350,000 mentions from &gt;4000 PubMed abstracts, linking them to the UMLS concepts; it was used in the studies by Zotova et al [<xref ref-type="bibr" rid="ref40">40</xref>] and Dong et al [<xref ref-type="bibr" rid="ref54">54</xref>], in which SNOMED CT was loaded onto the UMLS. The ShARe/CLEF 2013 corpus [<xref ref-type="bibr" rid="ref70">70</xref>] consists of deidentified clinical notes annotated with disease mentions using the SNOMED CT subset of the UMLS; it was used for testing concept normalization tasks [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref54">54</xref>].</p>
      </sec>
      <sec>
        <title>SNOMED CT Content Integration Into NLP Pipelines</title>
        <sec>
          <title>Overview</title>
          <p>While the categorization methods by Pan et al [<xref ref-type="bibr" rid="ref7">7</xref>] pertained to the integration of LLMs with general-purpose KGs, we treated SNOMED CT as a specified form of KG. Their third category—KG-enhanced LLM interpretability—was omitted due to the lack of relevant studies in our review. In addition, we found no studies that fit into the subcategories “Integrating KGs into Training Objectives” (under “KG-enhanced LLM pretraining”) and “Dynamic Knowledge Fusion” (under “SNOMED CT–enhanced LLM inference”). The overarching categorization of all included methods is shown in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref>.</p>
          <boxed-text id="box2" position="float">
            <title>Summarized categorizations of SNOMED CT–incorporated large language model (LLM) methods (allowed duplicated counting of studies).</title>
            <p>
              <bold>Category and subcategory</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>SNOMED CT–enhanced LLM pretraining</p>
                <list>
                  <list-item>
                    <p>Integrating SNOMED CT into LLM inputs (n=28, 76%)</p>
                  </list-item>
                  <list-item>
                    <p>Integrating SNOMED CT into additional fusion modules (n=5, 14%)</p>
                  </list-item>
                </list>
              </list-item>
              <list-item>
                <p>SNOMED CT–enhanced LLM inference</p>
                <list>
                  <list-item>
                    <p>Retrieval-augmented knowledge fusion (n=5, 14%)</p>
                  </list-item>
                </list>
              </list-item>
            </list>
          </boxed-text>
        </sec>
        <sec>
          <title>Integration of SNOMED CT Into LLM Inputs</title>
          <sec>
            <title>Overview</title>
            <p>Research in this area concentrated on developing new training objectives for LLMs that incorporate knowledge awareness. More specifically, this line of research aimed to incorporate relevant portions or subsets of SNOMED CT as additional input to LLMs during training. Because a disproportionately large number of included studies (28/37, 76%) fell into this category, we analyzed the methodology by two additional themes: (1) the content of SNOMED CT that was integrated into an LLM and (2) the part of the NLP pipeline into which the aforementioned content was incorporated. After qualitative analysis of the included articles and heuristic discussions among reviewers, we categorized the former theme into descriptions (including descriptions of synonyms), relations, and entity types (classes) and the latter theme into encoders and training data. SNOMED CT contents could be incorporated into LLM encoders either as embedding vectors or as annotations or tags when incorporated into the training corpus.</p>
            <p><xref ref-type="table" rid="table3">Table 3</xref> shows the distribution of models across SNOMED CT contents and NLP pipelines, allowing for duplicated counting of a single study if it adopted ≥2 methods.</p>
            <table-wrap position="float" id="table3">
              <label>Table 3</label>
              <caption>
                <p>Distributions of models across SNOMED CT contents and natural language processing (NLP) pipelines.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="380"/>
                <col width="290"/>
                <col width="330"/>
                <thead>
                  <tr valign="top">
                    <td>SNOMED CT content integrated into the NLP pipeline</td>
                    <td colspan="2">Part of the NLP pipeline where SNOMED CT contents were integrated into</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>Encoder (as vector embedding)</td>
                    <td>Training corpora (as annotated text)</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>Description</td>
                    <td>[<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref54">54</xref>]</td>
                    <td>[<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref47">47</xref>-<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref57">57</xref>]</td>
                  </tr>
                  <tr valign="top">
                    <td>Relation</td>
                    <td>[<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]</td>
                    <td>[<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref53">53</xref>]</td>
                  </tr>
                  <tr valign="top">
                    <td>Entity type (class)</td>
                    <td>—<sup>a</sup></td>
                    <td>[<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref51">51</xref>]</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table3fn1">
                  <p><sup>a</sup>Not available.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
          <sec>
            <title>Integration of SNOMED CT Descriptions</title>
            <p>Vector representations of SNOMED CT concept descriptions were created to facilitate seamless fusion into LLM encoders. The vectors for SNOMED CT description embeddings were used to calculate cosine similarity between the original mentions and SNOMED CT descriptions for concept normalization tasks [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref54">54</xref>].</p>
            <p>Instead of transforming text descriptions into vector embeddings, NL description texts were directly added to training corpora to expand the size of in-domain vocabulary (<xref rid="figure2" ref-type="fig">Figure 2</xref>). The description texts of synonyms were either concatenated in the training corpora before being input into an LLM for pretraining [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref57">57</xref>] or they replaced the original entity mentions in the text with standardized terms [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref48">48</xref>]. The descriptions of SNOMED CT codes were also prepended to the word sequences as classifier tokens for LLM pretraining [<xref ref-type="bibr" rid="ref23">23</xref>]. The multilingual feature of SNOMED CT descriptions was exploited to address the limited availability of training datasets in foreign languages by adding the translated SNOMED CT descriptions into the training corpora [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref50">50</xref>].</p>
            <fig id="figure2" position="float">
              <label>Figure 2</label>
              <caption>
                <p>Integrating SNOMED CT descriptions into large language models. CLS: classification; SYN: synonym.</p>
              </caption>
              <graphic xlink:href="medinform_v12i1e62924_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Integration of SNOMED CT Relations</title>
            <p>This line of research introduced relevant subgraph information of SNOMED CT, representing SNOMED CT relations as graph edges, into LLMs (<xref rid="figure3" ref-type="fig">Figure 3</xref>). Kalyan and Sangeetha [<xref ref-type="bibr" rid="ref31">31</xref>] encoded SNOMED CT concept descriptions to generate concept embedding vectors and learn representation vectors of concept mentions in the text, further improving the representations by retrofitting the target concept vectors with SNOMED CT synonym relations. CODER [<xref ref-type="bibr" rid="ref45">45</xref>] used KG embedding methods such as DistMult and ANALOGY [<xref ref-type="bibr" rid="ref71">71</xref>] to learn relational knowledge from SNOMED CT, enabling the quantification of term-relation-term similarity as well as term-term similarity.</p>
            <fig id="figure3" position="float">
              <label>Figure 3</label>
              <caption>
                <p>Integrating SNOMED CT relations into large language models. CLS: classification.</p>
              </caption>
              <graphic xlink:href="medinform_v12i1e62924_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
            <p>A different approach was taken to introduce textual relation triplets defined by SNOMED CT to expand the size of training corpora. Soto et al [<xref ref-type="bibr" rid="ref21">21</xref>] exploited the relations defined in SNOMED CT, such as <italic>is_a</italic> and <italic>occurs_in</italic>, to generate synthetic training corpora. Relations defined in SNOMED CT were also used to apply weak supervision to sentence pairs extracted from PubMed to establish contradiction labels in the dataset [<xref ref-type="bibr" rid="ref53">53</xref>]. Other authors exploited the existing mappings to other ontologies (eg, International Classification of Diseases-10 and UMLS) to enrich the training corpus with the description texts from the linked ontology concepts [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref52">52</xref>].</p>
          </sec>
          <sec>
            <title>Integration of SNOMED CT Entity Types</title>
            <p>The type of entities was incorporated into training corpora by distantly labeling the identified entities with SNOMED CT semantic tags (eg, diseases and chemicals; <xref rid="figure4" ref-type="fig">Figure 4</xref>) [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. In other studies, training corpora were annotated with SNOMED CT top-level hierarchies [<xref ref-type="bibr" rid="ref51">51</xref>] or subclasses of top-level hierarchies [<xref ref-type="bibr" rid="ref42">42</xref>] to label sentences per their respective tasks.</p>
            <fig id="figure4" position="float">
              <label>Figure 4</label>
              <caption>
                <p>Integrating SNOMED CT entity type information into large language models. CLS: classification.</p>
              </caption>
              <graphic xlink:href="medinform_v12i1e62924_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
        </sec>
        <sec>
          <title>Integration of SNOMED CT Into Additional Fusion Modules</title>
          <p>In this approach, concept information was processed separately before being concatenated and fused with the LLM embedding output (<xref rid="figure5" ref-type="fig">Figure 5</xref>). Authors created knowledge-directed embeddings using SNOMED CT graphs, where concepts were represented as nodes and relations as edges, and concatenated them with the LLM contextual embeddings. The merged representations of text and graph embeddings were then passed through a task-specific knowledge fusion module to achieve end tasks such as semantic similarity measurement [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref46">46</xref>], classification [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref27">27</xref>], and question answering [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. To represent the graph information of SNOMED CT concepts, Chang et al [<xref ref-type="bibr" rid="ref36">36</xref>] used a graph convolutional network [<xref ref-type="bibr" rid="ref72">72</xref>] for encoding node features and edges. Chopra et al [<xref ref-type="bibr" rid="ref22">22</xref>] proposed the Bio-MTDDN model, which introduced the shortest path information between corresponding SNOMED CT concepts into knowledge-directed embeddings.</p>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>Integrating SNOMED CT into additional fusion modules.</p>
            </caption>
            <graphic xlink:href="medinform_v12i1e62924_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Retrieval-Augmented Knowledge Fusion</title>
          <p>In this approach, SNOMED CT was located outside LLMs as a fact-consulting knowledge base, injecting knowledge during inference (<xref rid="figure6" ref-type="fig">Figure 6</xref>). The module functioned as a gazetteer (dictionary), matching mentions in texts against the dictionary of SNOMED CT descriptions to filter out irrelevant entities from the models and map textual mentions to the most likely SNOMED CT concepts [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>]. These methods primarily concentrated on entity recognition and question answering, capturing both textual semantic meanings and up-to-date real-world knowledge.</p>
          <fig id="figure6" position="float">
            <label>Figure 6</label>
            <caption>
              <p>Retrieval-augmented knowledge fusion. LLM: large language model.</p>
            </caption>
            <graphic xlink:href="medinform_v12i1e62924_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>End Task and Performance Gain After SNOMED CT Integration</title>
        <sec>
          <title>Overview</title>
          <p>Most of the included studies (30/37, 81%) focused on NLU tasks, such as entity typing and classification. NLG tasks, including translation and summarization, were also attempted by a substantial number of studies (9/37, 24%), often involving various NLU pipelines before producing the final text output. Therefore, notably, works on NLU may also appear in the NLG category. Herein, we also compared the performance of models integrated with SNOMED CT to that of their counterparts without SNOMED CT integration.</p>
        </sec>
        <sec>
          <title>NLU Tasks</title>
          <sec>
            <title>Entity Extraction and Typing</title>
            <p>Entity typing or named entity recognition tasks aim to detect specific types of entities by identifying the spans of their mentions in the text. These can be regarded as multiclassification tasks, where the number of classes is arbitrarily chosen by researchers. To fine-tune LLMs for type classification, authors annotated entities in texts by matching domain gazetteer strings (eg, “BIO” tagging scheme) [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref49">49</xref>] or using off-the-shelf automatic concept extractors [<xref ref-type="bibr" rid="ref27">27</xref>]. The identified entities were then classified into human-annotated entity types [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>] or topmost nodes in the SNOMED CT hierarchies [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref51">51</xref>]. In addition to typing individual entities, extraction and typing of relations between 2 entities were also attempted to align the detected entities with FHIR resources [<xref ref-type="bibr" rid="ref25">25</xref>], such as protein to chemical and gene to disease [<xref ref-type="bibr" rid="ref46">46</xref>] as well as disease to inflicted family members [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
            <p>Many researchers did not conduct a comparative performance analysis of their SNOMED CT–integrated models against out-of-domain vanilla models. Among the few researchers who reported such comparisons, Jha and Zhang [<xref ref-type="bibr" rid="ref46">46</xref>] demonstrated a gain in the <italic>F</italic><sub>1</sub>-score after the integration of SNOMED CT, while Montañés-Salas et al [<xref ref-type="bibr" rid="ref37">37</xref>] found a positive impact only on recall (<xref ref-type="table" rid="table4">Table 4</xref>).</p>
            <table-wrap position="float" id="table4">
              <label>Table 4</label>
              <caption>
                <p>Percentage performance gain in biomedical entity typing tasks after SNOMED CT integration into large language models.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="260"/>
                <col width="220"/>
                <col width="190"/>
                <col width="190"/>
                <col width="140"/>
                <thead>
                  <tr valign="bottom">
                    <td>Studies</td>
                    <td><italic>F</italic><sub>1</sub>-score gain (%)</td>
                    <td>Precision gain (%)</td>
                    <td>Recall gain (%)</td>
                    <td>AUC<sup>a</sup> gain (%)</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>Montañés-Salas et al [<xref ref-type="bibr" rid="ref37">37</xref>] (Best 2 model)</td>
                    <td>−0.11 (0.899→0.898)</td>
                    <td>−7.97 (0.928→0.854)</td>
                    <td>+8.60 (0.872→0.947)</td>
                    <td>—<sup>b</sup></td>
                  </tr>
                  <tr valign="top">
                    <td>Jha and Zhang [<xref ref-type="bibr" rid="ref46">46</xref>] (PubMedBERT on BC2GM)</td>
                    <td>+4.08 (0.80982→0.84287)</td>
                    <td>—</td>
                    <td>—</td>
                    <td>—</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table4fn1">
                  <p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p>
                </fn>
                <fn id="table4fn2">
                  <p><sup>b</sup>Not available.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
          <sec>
            <title>Classification</title>
            <p>We defined classification tasks as occurring at the sentence or document level, rather than at the word, entity, or phrase level. When classification tasks were implemented, semantic similarity [<xref ref-type="bibr" rid="ref36">36</xref>] or the conditional probability of a positive case [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref53">53</xref>] was calculated, and the case was categorized as positive if the probability exceeded a threshold. Binary classification was performed to determine whether a sentence pair was entailed [<xref ref-type="bibr" rid="ref33">33</xref>], contradictory [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref53">53</xref>], or similar [<xref ref-type="bibr" rid="ref36">36</xref>]. Multilabel classification was conducted to categorize utterances by clinical encounter components, such as symptoms, complaints, and medications [<xref ref-type="bibr" rid="ref27">27</xref>]; social determinants of health [<xref ref-type="bibr" rid="ref42">42</xref>]; or narrators’ intent [<xref ref-type="bibr" rid="ref48">48</xref>].</p>
            <p><xref ref-type="table" rid="table5">Table 5</xref> shows the percentage performance gain after SNOMED CT integration in classification tasks. While Yadav et al [<xref ref-type="bibr" rid="ref33">33</xref>] and Zhang et al [<xref ref-type="bibr" rid="ref48">48</xref>] estimated the performance of their models based on the <italic>F</italic><sub>1</sub>-score, precision, and recall, Khosla et al [<xref ref-type="bibr" rid="ref27">27</xref>] and Makhervaks et al [<xref ref-type="bibr" rid="ref53">53</xref>] measured performance in terms of the area under the receiver operating characteristic curve, which improved by 0.87% to 14.83% after the integration of SNOMED CT. Chang et al [<xref ref-type="bibr" rid="ref36">36</xref>] reported the Pearson correlation to assess clinical semantic textual similarity, and the incorporation of SNOMED CT into ClinicalBERT improved the performance of the model by 1.77% and 2.36% using cui2vec [<xref ref-type="bibr" rid="ref73">73</xref>] and KG embeddings, respectively.</p>
            <table-wrap position="float" id="table5">
              <label>Table 5</label>
              <caption>
                <p>Percentage performance gain in classification tasks after SNOMED CT integration into large language models.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="30"/>
                <col width="150"/>
                <col width="0"/>
                <col width="170"/>
                <col width="0"/>
                <col width="170"/>
                <col width="0"/>
                <col width="170"/>
                <col width="0"/>
                <col width="150"/>
                <col width="0"/>
                <col width="160"/>
                <thead>
                  <tr valign="bottom">
                    <td colspan="3">Studies</td>
                    <td colspan="2"><italic>F</italic><sub>1</sub>-score gain (%)</td>
                    <td colspan="2">Precision gain (%)</td>
                    <td colspan="2">Recall gain (%)</td>
                    <td colspan="2">AUC<sup>a</sup> gain (%)</td>
                    <td>Accuracy gain (%)</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td colspan="3">
                      <bold>Chopra et al [<xref ref-type="bibr" rid="ref22">22</xref>]</bold>
                    </td>
                    <td colspan="2">—<sup>b</sup></td>
                    <td colspan="2">—</td>
                    <td colspan="2">—</td>
                    <td colspan="2">—</td>
                    <td>+0.99</td>
                  </tr>
                  <tr valign="top">
                    <td colspan="3">
                      <bold>Yadav et al [<xref ref-type="bibr" rid="ref33">33</xref>]</bold>
                    </td>
                    <td colspan="2">+26.05 (0.4718→0.5947)</td>
                    <td colspan="2">+36.87 (0.4616→0.6318)</td>
                    <td colspan="2">+16.41 (0.4826→0.5618)</td>
                    <td colspan="2">—</td>
                    <td>+17.27 (0.4790→0.5617)</td>
                  </tr>
                  <tr valign="top">
                    <td colspan="3">
                      <bold>Khosla et al [<xref ref-type="bibr" rid="ref27">27</xref>]</bold>
                    </td>
                    <td colspan="2">—</td>
                    <td colspan="2">—</td>
                    <td colspan="2">—</td>
                    <td colspan="2">+0.85 (0.468→0.472)</td>
                    <td>—</td>
                  </tr>
                  <tr valign="top">
                    <td colspan="12">
                      <bold>Zhang et al [<xref ref-type="bibr" rid="ref48">48</xref>]</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>BioBERT for intent detection</td>
                    <td colspan="2">+1.15 (0.701→0.693)</td>
                    <td colspan="2">—</td>
                    <td colspan="2">—</td>
                    <td colspan="2">—</td>
                    <td colspan="2">—</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>Semantic matching for content recognition</td>
                    <td colspan="2">—</td>
                    <td colspan="2">−0.90 (1.000→0.991)</td>
                    <td colspan="2">+12.15 (0.724→0.812)</td>
                    <td colspan="2">—</td>
                    <td colspan="2">—</td>
                  </tr>
                  <tr valign="top">
                    <td colspan="12">
                      <bold>Makhervaks et al [<xref ref-type="bibr" rid="ref53">53</xref>]</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>BERT based on MedNLI-General</td>
                    <td colspan="2">—</td>
                    <td colspan="2">—</td>
                    <td colspan="2">—</td>
                    <td colspan="2">+14.83 (0.661→0.759)</td>
                    <td colspan="2">—</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>Bio-GPT on MedNLI-General</td>
                    <td colspan="2">—</td>
                    <td colspan="2">—</td>
                    <td colspan="2">—</td>
                    <td colspan="2">+10.34 (0.725→0.800)</td>
                    <td colspan="2">—</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table5fn1">
                  <p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p>
                </fn>
                <fn id="table5fn2">
                  <p><sup>b</sup>Not available.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
          <sec>
            <title>MCN Tasks</title>
            <p>The most prominent end task in NLU was MCN, with 15 studies involved. MCN, the task of linking textual mentions to concepts in an ontology, provides a solution for unifying different ways of referring to the same concept. All the studies approached concept recognition as a multilabel classification task involving entity extraction and entity typing from words, phrases, or sentences. Models were trained on corpora annotated with SNOMED CT concepts and semantic types to identify concept mentions and generate a list of candidate SNOMED CT concepts that best match those mentions from testing texts. When training from annotated corpora was not available, MetaMap [<xref ref-type="bibr" rid="ref74">74</xref>] was used to extract biomedical entities mentioned in free texts and map them to ontology concepts [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref50">50</xref>]. When candidate concepts were ranked, representation vectors of mentions and concept descriptions were generated, and their similarity was calculated using cosine similarity [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref54">54</xref>], linear transformation such as support vector classifiers [<xref ref-type="bibr" rid="ref52">52</xref>], or softmax function [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. In a more rule-oriented approach, Borchert and Schapranow [<xref ref-type="bibr" rid="ref47">47</xref>] calculated weights based on semantic type and preferred term status from a gazetteer to reorder candidate lists. In other studies [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref50">50</xref>], sieve-based multipass entity linking systems [<xref ref-type="bibr" rid="ref75">75</xref>] were used to rank the most likely concepts and achieved superior performance compared to neural classifiers.</p>
            <p>Most of the studies observed positive gains in accuracy in MCN tasks after SNOMED CT integration (<xref ref-type="table" rid="table6">Table 6</xref>). Two authors reported the pre- and postintegration <italic>F</italic><sub>1</sub>-scores, recall values, and precision values and observed inconsistent results, with one reporting positive gains in the <italic>F</italic><sub>1</sub>-score and precision value and the other demonstrating a loss in the <italic>F</italic><sub>1</sub>-score and precision value after the integration of SNOMED CT.</p>
            <table-wrap position="float" id="table6">
              <label>Table 6</label>
              <caption>
                <p>Percentage performance gain in medical concept normalization tasks after SNOMED CT integration into large language models.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="280"/>
                <col width="180"/>
                <col width="180"/>
                <col width="180"/>
                <col width="180"/>
                <thead>
                  <tr valign="top">
                    <td>Studies</td>
                    <td><italic>F</italic><sub>1</sub>-score gain (%)</td>
                    <td>Precision gain (%)</td>
                    <td>Recall gain (%)</td>
                    <td>Accuracy gain (%)</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>Peterson et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td>
                    <td>−1.05 (0.95→0.94)</td>
                    <td>−1.04 (0.96→0.95)</td>
                    <td>0 (0.94→0.94)</td>
                    <td>—<sup>a</sup></td>
                  </tr>
                  <tr valign="top">
                    <td>Wang et al [<xref ref-type="bibr" rid="ref26">26</xref>] (vs training data dictionary with exact match, ignore order “yes”)<sup>b</sup></td>
                    <td>—</td>
                    <td>—</td>
                    <td>—</td>
                    <td>+27.36 (0.6013→0.7658)</td>
                  </tr>
                  <tr valign="top">
                    <td>Hristov et al [<xref ref-type="bibr" rid="ref34">34</xref>]</td>
                    <td>—</td>
                    <td>—</td>
                    <td>—</td>
                    <td>+73.21 (0.56→0.97)</td>
                  </tr>
                  <tr valign="top">
                    <td>Dai et al (2021) [<xref ref-type="bibr" rid="ref35">35</xref>]</td>
                    <td>—</td>
                    <td>—</td>
                    <td>—</td>
                    <td>+45.08 (0.417→0.605)</td>
                  </tr>
                  <tr valign="top">
                    <td>Xu and Miller [<xref ref-type="bibr" rid="ref44">44</xref>] (on ShARe/CLEF 2013)</td>
                    <td>—</td>
                    <td>—</td>
                    <td>—</td>
                    <td>+0.68 (0.8333→0.8277)</td>
                  </tr>
                  <tr valign="top">
                    <td>Dong et al [<xref ref-type="bibr" rid="ref54">54</xref>] (BLINKout on ShARe/CLEF 2013)</td>
                    <td>+5.87 (0.818→0.866)</td>
                    <td>+15.11 (0.741→0.853)</td>
                    <td>−3.62 (0.912→0.879)</td>
                    <td>+10.68 (0.777→0.860)</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table6fn1">
                  <p><sup>a</sup>Not available.</p>
                </fn>
                <fn id="table6fn2">
                  <p><sup>b</sup>The training data dictionary was constructed based on the Medical Concept Normalization corpus data. The SNOMED CT dictionary included the RxNorm dictionary.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
        </sec>
        <sec>
          <title>NLG Tasks</title>
          <sec>
            <title>Machine Translation</title>
            <p>Several studies that participated in the WMT Biomedical Shared Task [<xref ref-type="bibr" rid="ref76">76</xref>] described their methods for translating biomedical texts from various foreign languages, such as Spanish, French, German, and Chinese, as well as less-resourced languages, such as Basque, into English or vice versa. Transformer-based multilingual neural machine translation systems were the mainstream architectures, which were trained on dictionaries derived from SNOMED CT [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref39">39</xref>] or clinical notes artificially generated from SNOMED CT terminology contents [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref29">29</xref>].</p>
            <p>The translation performance was reported using the Bilingual Evaluation Understudy (BLEU) score [<xref ref-type="bibr" rid="ref77">77</xref>]. While most studies (4/5, 80%) presented improved BLEU scores by up to 131.66% [<xref ref-type="bibr" rid="ref21">21</xref>] compared to their out-of-domain models, some studies (1/5, 20%) reported nonsuperior results [<xref ref-type="bibr" rid="ref30">30</xref>] (<xref ref-type="table" rid="table7">Table 7</xref>).</p>
            <table-wrap position="float" id="table7">
              <label>Table 7</label>
              <caption>
                <p>Performance comparison of biomedical translation tasks with and without SNOMED CT integration into large language models (LLMs).</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="30"/>
                <col width="300"/>
                <col width="0"/>
                <col width="230"/>
                <col width="0"/>
                <col width="230"/>
                <col width="0"/>
                <col width="210"/>
                <thead>
                  <tr valign="top">
                    <td colspan="3">Studies and translation direction</td>
                    <td colspan="2">Performance on test data without SNOMED CT integration into an LLM (BLEU<sup>a</sup> score)</td>
                    <td colspan="2">Performance on test data with SNOMED CT integration into an LLM (BLEU score)</td>
                    <td>BLEU score gain after SNOMED CT integration into an LLM (%)</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td colspan="8">
                      <bold>Soto et al [<xref ref-type="bibr" rid="ref21">21</xref>]</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>Basque to Spanish</td>
                    <td colspan="2">10.55</td>
                    <td colspan="2">24.44</td>
                    <td colspan="2">+131.66</td>
                  </tr>
                  <tr valign="top">
                    <td colspan="8">
                      <bold>Soto et al [<xref ref-type="bibr" rid="ref30">30</xref>]</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>Spanish to English</td>
                    <td colspan="2">57.25</td>
                    <td colspan="2">56.89</td>
                    <td colspan="2">−0.63</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>English to Spanish</td>
                    <td colspan="2">47.19</td>
                    <td colspan="2">47.15</td>
                    <td colspan="2">−0.08</td>
                  </tr>
                  <tr valign="top">
                    <td colspan="8">
                      <bold>Corral and Saralegi [<xref ref-type="bibr" rid="ref29">29</xref>]</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>English to Basque</td>
                    <td colspan="2">12.85</td>
                    <td colspan="2">13.61</td>
                    <td colspan="2">+5.91</td>
                  </tr>
                  <tr valign="top">
                    <td colspan="8">
                      <bold>Peng et al [<xref ref-type="bibr" rid="ref28">28</xref>]</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>English to French</td>
                    <td colspan="2">38.98</td>
                    <td colspan="2">41.66</td>
                    <td colspan="2">+6.88</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>French to English</td>
                    <td colspan="2">38.31</td>
                    <td colspan="2">38.44</td>
                    <td colspan="2">+0.34</td>
                  </tr>
                  <tr valign="top">
                    <td colspan="8">
                      <bold>Wang et al [<xref ref-type="bibr" rid="ref39">39</xref>]</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>English to Italian</td>
                    <td colspan="2">33.53</td>
                    <td colspan="2">42.17</td>
                    <td colspan="2">+25.77</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>Italian to English</td>
                    <td colspan="2">36.43</td>
                    <td colspan="2">43.72</td>
                    <td colspan="2">+20.01</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>English to Portuguese</td>
                    <td colspan="2">38.73</td>
                    <td colspan="2">50.12</td>
                    <td colspan="2">+29.41</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>Portuguese to English</td>
                    <td colspan="2">41.84</td>
                    <td colspan="2">54.74</td>
                    <td colspan="2">+30.83</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>English to Russian</td>
                    <td colspan="2">25.25</td>
                    <td colspan="2">36.25</td>
                    <td colspan="2">+43.56</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>Russian to English</td>
                    <td colspan="2">39.76</td>
                    <td colspan="2">47.09</td>
                    <td colspan="2">+18.44</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table7fn1">
                  <p><sup>a</sup>BLEU: Bilingual Evaluation Understudy.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
          <sec>
            <title>Text Summarization</title>
            <p>For medical text summarization, encoder-decoder LLMs were used to process input embeddings and produce simplified texts. Pattisapu et al [<xref ref-type="bibr" rid="ref32">32</xref>] primarily focused on the simplification of verbose sentences. They substituted biomedical mentions with UMLS-preferred names and tokenized them at the subword level to produce noisy input sentences for training. In contrast, Searle et al [<xref ref-type="bibr" rid="ref57">57</xref>] summarized entire hospital encounters into a few sentences by ranking the most salient ones to constitute the summary. To address the hallucination problem arising from LLMs, authors used SNOMED CT semantic tags of the extracted biomedical terms to configure guidance signals for clinical problems and interventions.</p>
            <p>Recall-Oriented Understudy for Gisting Evaluation recall [<xref ref-type="bibr" rid="ref78">78</xref>] measures how many n-grams in the source text appear in the summarization. Pattisapu et al [<xref ref-type="bibr" rid="ref32">32</xref>] reported no gain in ROUGE recall when incorporating SNOMED CT into NLP pipelines. Searle et al [<xref ref-type="bibr" rid="ref57">57</xref>] presented ROUGE-<italic>F</italic><sub>1</sub>, a harmonized measure of the recall and precision for ROUGE, and observed improvements by 3.6% (from 11.1 to 11.5) and 48.84% (from 8.6 to 12.8) on the Medical Information Mart for Intensive Care III and King’s College Hospital corpora, respectively, after incorporating SNOMED CT.</p>
          </sec>
          <sec>
            <title>Question Answering and Generation</title>
            <p>Generating answers for short-answer or essay questions, as opposed to multiple-choice questions, can be classified as NLG. The task of question answering may involve preliminary NLU pipelines, such as intent and content recognition. Zhang et al [<xref ref-type="bibr" rid="ref48">48</xref>] developed a clinical communication training dialogue system incorporated with SNOMED CT synonyms for the augmentation of textual data and BioBERT for intent recognition. They qualitatively evaluated the performance of the conversation system using scales rated by physicians from 29 training records, which indicated a comparable precision as clinical experts.</p>
          </sec>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>LLMs and SNOMED CT</title>
        <p>In this scoping review, we observed that BERT was the mainstream LLM integrated with SNOMED CT. Considering the significant time required to publish state-of-the-art methodologies, especially in peer-reviewed journals [<xref ref-type="bibr" rid="ref79">79</xref>], it is unsurprising that more recent inventions, such as GPT-3.5 and BART, were less prevalent in articles published from 2018 to 2023. Researchers in this field exploited biomedically oriented BERT variants, such as BioBERT and PubMedBERT, reflecting the need for biomedical tasks to be trained or fine-tuned on specialized corpora [<xref ref-type="bibr" rid="ref16">16</xref>]. However, due to privacy and confidentiality concerns, there is a dearth of clinical documents and patient notes, making it difficult to sufficiently train biomedical LLMs to an extent comparable to those in the general domain [<xref ref-type="bibr" rid="ref80">80</xref>]. SNOMED CT can supplement or even substitute biomedical pretraining corpora, addressing the chronic shortage, as noted in this review. A substantial number of studies included in this review used SNOMED CT to expand pretraining corpora by concatenating synonyms or relations in documents or generating synthetic texts based on SNOMED CT descriptions or relations.</p>
        <p>We identified 3 approaches to incorporating SNOMED CT into LLMs: LLM input, additional fusion modules, and knowledge retriever, with the former 2 intervening in the pretraining process of LLMs. While either lexical or graph information from SNOMED CT could be incorporated into the pretraining stage, the lexicon of SNOMED CT descriptions was the predominant form of integration. This underscores that SNOMED CT chiefly introduces synonym information to LLMs, yet relation information remains underused in NLP research. The advantage of SNOMED CT in defining relations between biomedical entities through semantic networks needs to be adopted for more sophisticated tasks such as knowledge inference and validation and highlighted within the biomedical NLP research community.</p>
      </sec>
      <sec>
        <title>End Tasks and Performance Reports</title>
        <p>A significant number of studies included in this review engaged in the concept recognition process from free texts, whether as the final task or an intermediate step for subsequent tasks. Recognizing and extracting SNOMED CT concepts from the unstructured sections of EHRs is becoming crucial in clinical settings, where substantial patient information, such as social history and socioeconomic status, remains untapped in free-text clinical notes [<xref ref-type="bibr" rid="ref81">81</xref>]. Leveraging previously unrepresented SNOMED CT concepts from free-text clinical data holds great potential in significantly enhancing clinical care and research, especially in the era of smart applications where patient-generated data can be integrated into EHRs through the representation of patient-authored texts with SNOMED CT concepts [<xref ref-type="bibr" rid="ref82">82</xref>].</p>
        <p>Only a small fraction of the included models disclosed performance comparisons before and after SNOMED CT integration. For example, only 6 (40%) out of 15 studies on MCN tasks provided information about the gain in the <italic>F</italic><sub>1</sub>-scores or accuracy after SNOMED CT incorporation. This suggests that many biomedical NLP researchers do not focus on the role of SNOMED CT or other ontologies in improving their models. Moreover, some authors chose to demonstrate only selected metrics, potentially leading to publication bias that favors improved performance at first glance. In our review, we identified 7 studies that presented only 1 metric without disclosing others (excluding those that reported only the BLEU score, which is widely recognized as the best metric for measuring translation performance). This focus on a single metric may encourage researchers to optimize their models for that metric, potentially leading to underperformance in other areas. The NLP community needs to propose standardized methods for presenting performance and, if possible, develop new metrics that better reflect the specifics of NLU and NLG tasks performed by LLMs.</p>
      </sec>
      <sec>
        <title>Implications for Future Endeavors</title>
        <p>The knowledge-intensive approaches to enhancing LMs, which are often renounced by those favoring deep learning–based approaches, still comprise a small portion of the artificial intelligence research community. However, in the face of immense computational power and the availability of data required by LLMs and deep learning–based systems, an increasing number of researchers now advocate the harmonization of the 2 approaches [<xref ref-type="bibr" rid="ref83">83</xref>], and a plethora of KG-enhanced LLMs is developed in the general domain [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref84">84</xref>]. In addition to improving the performance of artificial intelligence models, ontologies and human-curated knowledge bases can address the explainability and controllability of artificial intelligence, probing facts within the human-interpretable form of system architectures [<xref ref-type="bibr" rid="ref85">85</xref>]. Exploring the trade-offs in combining the 2 approaches is anticipated to contribute toward trustworthy and reliable artificial intelligence.</p>
        <p>Among various biomedical terminology systems and ontologies, SNOMED CT was the primary focus in this review as a KG integrated with LLMs. Although the UMLS continues to dominate NLP research in the biomedical domain [<xref ref-type="bibr" rid="ref16">16</xref>], SNOMED CT has the potential to expand its influence, given its governance over the health care industry. Consequently, the use of SNOMED CT as a reliable knowledge source becomes more feasible, considering its presence in various EHR systems or common data models. While this review did not identify real-world SNOMED CT–incorporated LLM applications directly tied to EHR systems, SNOMED CT is implicitly expected to support these systems as a standardized terminology system bound to syntactic interoperability structures such as FHIR and OpenEHR. In addition, medical institutions already implementing SNOMED CT in their EHR systems are anticipated to incorporate LLM applications and use SNOMED CT at the point of care [<xref ref-type="bibr" rid="ref86">86</xref>]. Explicit descriptions of SNOMED CT in technical specifications or scientific papers by developers of these applications would have been valuable to include in this review.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>One of the limitations of this scoping review is that we examined LLMs that accepted SNOMED CT only as a working ontology, leaving other biomedical ontologies out of our scope. To the best of our knowledge, however, there is no comprehensive review of the use of other biomedical ontologies within LLMs. The queries used in this review, especially the first one, retrieved articles that used a variety of biomedical ontologies, such as the UMLS, Medical Subject Headings, Gene Ontology, and Medical Wikidata. We chose to limit the scope of our review to SNOMED CT due to the heterogeneity of components among different ontology systems and the difficulty in delineating the contributions of each ontology in a standardized way. A more consolidated analysis of different ontologies used within LLMs awaits more comprehensive work.</p>
        <p>A significant proportion of the included studies (23/37, 62%) were retrieved from conference proceedings. While we excluded short abstract articles and included only those that provided sufficient information to be categorized by our preset features, interested readers might find it challenging to delve into detailed methodologies from these proceedings articles. However, many of these papers refer to additional materials, such as GitHub (GitHub, Inc) repositories, to provide raw data and source codes; for example, Khosla et al [<xref ref-type="bibr" rid="ref27">27</xref>] provided the source code of their system on GitHub [<xref ref-type="bibr" rid="ref87">87</xref>]. We encourage more studies to share additional materials on open developer platforms to enhance methodology transparency and accelerate NLP research.</p>
        <p>Another limitation of this review is that we could not conclude on how the integration of SNOMED CT improved the performance of LLMs. While most of the studies (14/18, 78%) observed a positive impact on performance after SNOMED CT integration, their statistical significance was not indicated. Moreover, the diversity of evaluation methods prevented us from performing a meta-analysis across all the included studies. While we examined whether SNOMED CT integration improved LLM performance by presenting percentage gains across various metrics, these results are prone to being misleading due to potential publication bias and the insufficient number of included studies. Nevertheless, this before-and-after comparison method, often adopted for comparative studies, effectively measures the effect of interventions (SNOMED CT in our case) within a single group or entity [<xref ref-type="bibr" rid="ref88">88</xref>]. To control for confounding factors, we excluded models whose performance differences could be attributable to modalities other than SNOMED CT integration. For example, we excluded the study by Zotova et al [<xref ref-type="bibr" rid="ref40">40</xref>] from our analysis because their performance might have been affected by the use of a different testing corpus. An evenhanded testing bed, such as a shared task competition under a single testing method requiring all participants to report performance differences before and after KG integration, could provide a controlled evaluation to reliably and objectively measure the contributions of KGs.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In conclusion, this scoping review explored the methodologies and effectiveness of integrating SNOMED CT into LLMs. The predominant approach involved using SNOMED CT concept descriptions or graph embeddings as inputs for LM encoders, many of which were involved in MCN tasks. The endeavor to identify and extract SNOMED CT concepts from free texts was proven to be instrumental in enhancing the understanding and generation of NL texts for downstream tasks in the biomedical realm. However, our study revealed both a lack of standardized methods for assessing KG integration into LLMs and a scarcity of explicit performance reporting in existing research, highlighting significant gaps in current evaluation practices. These findings underline the need for more consistent reporting and evaluation practices in this field of research. Future research is anticipated to be more aware of the advantage of SNOMED CT when incorporating it into LLMs and to report findings in a manner that facilitates comparison across different works.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) checklist.</p>
        <media xlink:href="medinform_v12i1e62924_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 134 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Brief introduction to large language models.</p>
        <media xlink:href="medinform_v12i1e62924_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 412 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Summary of the included studies.</p>
        <media xlink:href="medinform_v12i1e62924_app3.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 61 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations From Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BLEU</term>
          <def>
            <p>Bilingual Evaluation Understudy</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FHIR</term>
          <def>
            <p>Fast Healthcare Interoperability Resource</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">KG</term>
          <def>
            <p>knowledge graph</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LM</term>
          <def>
            <p>language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">MCN</term>
          <def>
            <p>Medical Concept Normalization</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">NL</term>
          <def>
            <p>natural language</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">NLG</term>
          <def>
            <p>natural language generation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">NLU</term>
          <def>
            <p>natural language understanding</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">PRISMA-ScR</term>
          <def>
            <p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the National Research Foundation of Korea grant funded by the Republic of Korea government (Ministry of Science and Information and Communication Technology; RS-2024-00354718).</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data analyzed during this study are available from the corresponding author on reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on October 11, 2018</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1810.04805</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ryder</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Subbiah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kaplan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dhariwal</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Neelakantan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shyam</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sastry</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Askell</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Herbert-Voss</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Krueger</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Henighan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Child</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ramesh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ziegler</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Winter</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hesse</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sigler</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Litwin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chess</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Berner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McCandlish</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Amodei</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Language models are few-shot learners</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on May 28, 2020</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2005.14165</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>FL</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>DictBERT: dictionary description knowledge enhanced language model pre-training via contrastive learning</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on August 1, 2022</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2208.00635</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Sachan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Adapters for enhanced modeling of multilingual knowledge and text</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on October 24, 2022</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2210.13617</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Frieske</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ishii</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bang</surname>
              <given-names>YJ</given-names>
            </name>
            <name name-style="western">
              <surname>Madotto</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Survey of hallucination in natural language generation</article-title>
          <source>ACM Comput Surv</source>
          <year>2023</year>
          <month>03</month>
          <day>03</day>
          <volume>55</volume>
          <issue>12</issue>
          <fpage>1</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1145/3571730</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>A survey on hallucination in large language models: principles, taxonomy, challenges, and open questions</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on November 9, 2023</comment>
          <pub-id pub-id-type="doi">10.48550/arxiv.2311.05232</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Unifying large language models and knowledge graphs: a roadmap</article-title>
          <source>IEEE Trans Knowl Data Eng</source>
          <year>2024</year>
          <month>7</month>
          <volume>36</volume>
          <issue>7</issue>
          <fpage>3580</fpage>
          <lpage>99</lpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2024.3352100</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Schuurmans</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bosma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ichter</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Chain-of-thought prompting elicits reasoning in large language models</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on January 28, 2022</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2201.11903</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Perez</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Piktus</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Petroni</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Karpukhin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Küttler</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yih</surname>
              <given-names>WT</given-names>
            </name>
            <name name-style="western">
              <surname>Rocktäschel</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Riedel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kiela</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Retrieval-augmented generation for knowledge-intensive NLP tasks</article-title>
          <source>Proceedings of the 34th International Conference on Neural Information Processing Systems</source>
          <year>2020</year>
          <conf-name>NIPS'20</conf-name>
          <conf-date>December 6-12, 2020</conf-date>
          <conf-loc>Vancouver, BC</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Nie</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A survey of knowledge enhanced pre-trained language models</article-title>
          <source>IEEE Trans Knowl Data Eng</source>
          <year>2024</year>
          <month>4</month>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1413</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2023.3310002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lawrence</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Knowledge graphs + large language models = the ability for users to ask their own questions?</article-title>
          <source>Medium</source>
          <year>2023</year>
          <month>03</month>
          <day>31</day>
          <access-date>2023-12-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medium.com/@peter.lawrence_47665/knowledge-graphs-large-language-models-the-ability-for-users-to-ask-their-own-questions-e4afc348fa72">https://medium.com/@peter.lawrence_47665/knowledge-graphs-large-language-models-the-ability-for-users-to-ask-their-own-questions-e4afc348fa72</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anand</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ramesh</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>CY</given-names>
            </name>
          </person-group>
          <article-title>MultiModal language modelling on knowledge graphs for deep video understanding</article-title>
          <source>Proceedings of the 29th ACM International Conference on Multimedia</source>
          <year>2021</year>
          <conf-name>MM '21</conf-name>
          <conf-date>October 20-24, 2021</conf-date>
          <conf-loc>Virtual Event, China</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3474085.3479220</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fellbaum</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <source>WordNet: An Electronic Lexical Database</source>
          <year>1998</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>02</month>
          <day>15</day>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1234</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31501885"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
          <pub-id pub-id-type="medline">31501885</pub-id>
          <pub-id pub-id-type="pii">5566506</pub-id>
          <pub-id pub-id-type="pmcid">PMC7703786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The Unified Medical Language System (UMLS): integrating biomedical terminology</article-title>
          <source>Nucleic Acids Res</source>
          <year>2004</year>
          <month>01</month>
          <day>01</day>
          <volume>32</volume>
          <issue>Database issue</issue>
          <fpage>D267</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/14681409"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id>
          <pub-id pub-id-type="medline">14681409</pub-id>
          <pub-id pub-id-type="pii">32/suppl_1/D267</pub-id>
          <pub-id pub-id-type="pmcid">PMC308795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Pei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Tiwari</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Pre-trained language models in biomedical domain: a systematic survey</article-title>
          <source>ACM Comput Surv</source>
          <year>2023</year>
          <month>10</month>
          <day>05</day>
          <volume>56</volume>
          <issue>3</issue>
          <fpage>1</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1145/3611651</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Mostafa</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The use of SNOMED CT, 2013-2020: a literature review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>08</month>
          <day>13</day>
          <volume>28</volume>
          <issue>9</issue>
          <fpage>2017</fpage>
          <lpage>26</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34151978"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab084</pub-id>
          <pub-id pub-id-type="medline">34151978</pub-id>
          <pub-id pub-id-type="pii">6307174</pub-id>
          <pub-id pub-id-type="pmcid">PMC8363812</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Posnack</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Barker</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>The heat is on: US caught FHIR in 2019</article-title>
          <source>Health IT Buzz</source>
          <year>2021</year>
          <month>7</month>
          <day>29</day>
          <access-date>2023-12-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.healthit.gov/buzz-blog/health-it/the-heat-is-on-us-caught-fhir-in-2019">https://www.healthit.gov/buzz-blog/health-it/the-heat-is-on-us-caught-fhir-in-2019</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tricco</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Lillie</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zarin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>O'Brien</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Colquhoun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Levac</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Moher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Horsley</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Weeks</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hempel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Akl</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McGowan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hartling</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Aldcroft</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Garritty</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lewin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Godfrey</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Macdonald</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Langlois</surname>
              <given-names>EV</given-names>
            </name>
            <name name-style="western">
              <surname>Soares-Weiser</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Moriarty</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Clifford</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tunçalp</surname>
              <given-names>Ö</given-names>
            </name>
            <name name-style="western">
              <surname>Straus</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>PRISMA extension for scoping reviews (PRISMA-ScR): checklist and explanation</article-title>
          <source>Ann Intern Med</source>
          <year>2018</year>
          <month>10</month>
          <day>02</day>
          <volume>169</volume>
          <issue>7</issue>
          <fpage>467</fpage>
          <lpage>73</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.acpjournals.org/doi/abs/10.7326/M18-0850?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.7326/M18-0850</pub-id>
          <pub-id pub-id-type="medline">30178033</pub-id>
          <pub-id pub-id-type="pii">2700389</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Min</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sulem</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Veyseh</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Sainz</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Agirre</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Heintz</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Recent advances in natural language processing via large pre-trained language models: a survey</article-title>
          <source>ACM Comput Surv</source>
          <year>2023</year>
          <month>09</month>
          <day>14</day>
          <volume>56</volume>
          <issue>2</issue>
          <fpage>1</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1145/3605943</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Soto</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Perez-De-Vinaspre</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Oronoz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Labaka</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Leveraging SNOMED CT terms and relations for machine translation of clinical texts from Basque to Spanish</article-title>
          <source>Proceedings of the Second Workshop on Multilingualism at the Intersection of Knowledge Bases and Machine Translation</source>
          <year>2019</year>
          <conf-name>MomenT@MTSummit 2019</conf-name>
          <conf-date>August 19-23, 2019</conf-date>
          <conf-loc>Dublin, Ireland</conf-loc>
          <pub-id pub-id-type="doi">10.1093/jamia/ocz110</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chopra</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kaushik</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>MSIT_SRIB at MEDIQA 2019: knowledge directed multi-task framework for natural language inference in clinical domain</article-title>
          <source>Proceedings of the 18th BioNLP Workshop and Shared Task</source>
          <year>2019</year>
          <conf-name>BioNLP@ACL 2019</conf-name>
          <conf-date>August 1, 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w19-5052</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Rawat</surname>
              <given-names>BP</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Fine-tuning bidirectional encoder representations from transformers (BERT)-based models on large-scale electronic health record notes: an empirical study</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>09</month>
          <day>12</day>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>e14830</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/3/e14830/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/14830</pub-id>
          <pub-id pub-id-type="medline">31516126</pub-id>
          <pub-id pub-id-type="pii">v7i3e14830</pub-id>
          <pub-id pub-id-type="pmcid">PMC6746103</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gopale</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Begoli</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bethard</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Unified medical language system resources improve sieve-based generation and bidirectional encoder representations from transformers (BERT)-based ranking for concept normalization</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>10</month>
          <day>01</day>
          <volume>27</volume>
          <issue>10</issue>
          <fpage>1510</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32719838"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa080</pub-id>
          <pub-id pub-id-type="medline">32719838</pub-id>
          <pub-id pub-id-type="pii">5876963</pub-id>
          <pub-id pub-id-type="pmcid">PMC7566510</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A corpus-driven standardization framework for encoding clinical problems with HL7 FHIR</article-title>
          <source>J Biomed Inform</source>
          <year>2020</year>
          <month>10</month>
          <volume>110</volume>
          <fpage>103541</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(20)30169-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2020.103541</pub-id>
          <pub-id pub-id-type="medline">32814201</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(20)30169-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7701983</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hur</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Verspoor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Baldwin</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>A multi-pass sieve for clinical concept normalization</article-title>
          <source>Traitement Automatique Des Langues</source>
          <year>2020</year>
          <volume>61</volume>
          <issue>2</issue>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://findanexpert.unimelb.edu.au/scholarlywork/1542720-a-multi-pass-sieve-for-clinical-concept-normalization"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khosla</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vashishth</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Rose</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>MedFilter: improving extraction of task-relevant utterances through integration of discourse structure and ontological knowledge</article-title>
          <source>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2020</year>
          <conf-name>EMNLP 2020</conf-name>
          <conf-date>November 16-20, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2020.emnlp-main.626</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Huawei’s submissions to the WMT20 biomedical translation task</article-title>
          <source>Proceedings of the Fifth Conference on Machine Translation</source>
          <year>2020</year>
          <conf-name>WMT@EMNLP 2020</conf-name>
          <conf-date>November 19-20, 2020</conf-date>
          <conf-loc>Online</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Corral</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Saralegi</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Elhuyar submission to the biomedical translation task 2020 on terminology and abstracts translation</article-title>
          <source>Proceedings of the Fifth Conference on Machine Translation</source>
          <year>2020</year>
          <conf-name>WMT@EMNLP 2020</conf-name>
          <conf-date>November 19-20, 2020</conf-date>
          <conf-loc>Online</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Soto</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Perez-de-Vinaspre</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Labaka</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Oronoz</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Ixamed’s submission description for WMT20 Biomedical shared task: benefits and limitations of using terminologies for domain adaptation</article-title>
          <source>Proceedings of the Fifth Conference on Machine Translation</source>
          <year>2020</year>
          <conf-name>WMT@EMNLP 2020</conf-name>
          <conf-date>November 19-20, 2020</conf-date>
          <conf-loc>Online</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kalyan</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Sangeetha</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Target concept guided medical concept normalization in noisy user-generated texts</article-title>
          <source>Proceedings of Deep Learning Inside Out (DeeLIO): The First Workshop on Knowledge Extraction and Integration for Deep Learning Architectures</source>
          <year>2020</year>
          <conf-name>DeeLIO 2020</conf-name>
          <conf-date>November 19-20, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2020.deelio-1.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pattisapu</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Prabhu</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bhati</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Varma</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Leveraging social media for medical text simplification</article-title>
          <source>Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval</source>
          <year>2020</year>
          <conf-name>SIGIR '20</conf-name>
          <conf-date>July 25-30, 2020</conf-date>
          <conf-loc>Virtual Event</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3397271.3401105</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yadav</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pallagani</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Sheth</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Medical knowledge-enriched textual entailment framework</article-title>
          <source>Proceedings of the 28th International Conference on Computational Linguistics</source>
          <year>2020</year>
          <conf-name>COLING 2020</conf-name>
          <conf-date>December 8-13, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2020.coling-main.161</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hristov</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tahchiev</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Papazov</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tulechki</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Primov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Boytcheva</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Application of deep learning methods to SNOMED CT encoding of clinical texts: from data collection to extreme multi-label text-based classification</article-title>
          <source>Proceedings of the International Conference on Recent Advances in Natural Language Processing</source>
          <year>2021</year>
          <conf-name>RANLP 2021</conf-name>
          <conf-date>September 1-3, 2021</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.26615/978-954-452-072-4_063</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Rybinski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Karimi</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>SearchEHR: a family history search system for clinical decision support</article-title>
          <source>Proceedings of the 30th ACM International Conference on Information &amp; Knowledge Management</source>
          <year>2021</year>
          <conf-name>CIKM '21</conf-name>
          <conf-date>November 1-5, 2021</conf-date>
          <conf-loc>Virtual Event</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3459637.3481986</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Brandt</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>RA</given-names>
            </name>
          </person-group>
          <article-title>Incorporating domain knowledge into language models by using graph convolutional networks for assessing semantic textual similarity: model development and performance comparison</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>11</month>
          <day>26</day>
          <volume>9</volume>
          <issue>11</issue>
          <fpage>e23101</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/11/e23101/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23101</pub-id>
          <pub-id pub-id-type="medline">34842531</pub-id>
          <pub-id pub-id-type="pii">v9i11e23101</pub-id>
          <pub-id pub-id-type="pmcid">PMC8665398</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Montañés-Salas</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>López-Bosque</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>García-Garcés</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>del-Hoyo-Alonso</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>ITAINNOVA at SocialDisNER: a transformers cocktail for disease identification in social media in Spanish</article-title>
          <source>Proceedings of the 29th International Conference on Computational Linguistic</source>
          <year>2022</year>
          <conf-name>COLING 2022</conf-name>
          <conf-date>October 12-17, 2022</conf-date>
          <conf-loc>Gyeongju, South Korea</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ying</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Label refinement via contrastive learning for distantly-supervised named entity recognition</article-title>
          <source>Proceedings of the Annual Conference of the North American Chapter of the Association for Computational Linguistics</source>
          <year>2022</year>
          <conf-name>NAACL 2022</conf-name>
          <conf-date>July 10-15, 2022</conf-date>
          <conf-loc>Seattle, WA</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2022.findings-naacl.203</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Huawei BabelTar NMT at WMT22 biomedical translation task: how we further improve domain-specific NMT</article-title>
          <source>Proceedings of the Seventh Conference on Machine Translation</source>
          <year>2022</year>
          <conf-name>WMT 2022</conf-name>
          <conf-date>December 7-8, 2022</conf-date>
          <conf-loc>Abu Dhabi, United Arab Emirates</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zotova</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cuadros</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rigau</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>ClinIDMap: towards a clinical IDs mapping for data interoperability</article-title>
          <source>Proceedings of the Thirteenth Language Resources and Evaluation Conference</source>
          <year>2022</year>
          <conf-name>LREC 2022</conf-name>
          <conf-date>June 20-25, 2022</conf-date>
          <conf-loc>Marseille, France</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Standardization of clinical terminology based on hybrid recall and Ernie</article-title>
          <source>Proceedings of the 3rd International Symposium on Artificial Intelligence for Medicine Sciences</source>
          <year>2022</year>
          <conf-name>ISAIMS '22</conf-name>
          <conf-date>October 13-15, 2022</conf-date>
          <conf-loc>Amsterdam, The Netherlands</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3570773.3570782</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Richie</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Quan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Brent</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tsui</surname>
              <given-names>FR</given-names>
            </name>
          </person-group>
          <article-title>Classifying social determinants of health from unstructured electronic health records using deep learning-based natural language processing</article-title>
          <source>J Biomed Inform</source>
          <year>2022</year>
          <month>03</month>
          <volume>127</volume>
          <fpage>103984</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(21)00313-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2021.103984</pub-id>
          <pub-id pub-id-type="medline">35007754</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(21)00313-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Automatic SNOMED CT coding of Chinese clinical terms via attention-based semantic matching</article-title>
          <source>Int J Med Inform</source>
          <year>2022</year>
          <month>03</month>
          <volume>159</volume>
          <fpage>104676</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2021.104676</pub-id>
          <pub-id pub-id-type="medline">34990940</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(21)00302-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>A simple neural vector space model for medical concept normalization using concept embeddings</article-title>
          <source>J Biomed Inform</source>
          <year>2022</year>
          <month>06</month>
          <volume>130</volume>
          <fpage>104080</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(22)00096-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2022.104080</pub-id>
          <pub-id pub-id-type="medline">35472514</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(22)00096-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC9351985</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>CODER: knowledge-infused cross-lingual medical term embedding for term normalization</article-title>
          <source>J Biomed Inform</source>
          <year>2022</year>
          <month>02</month>
          <volume>126</volume>
          <fpage>103983</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(21)00312-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2021.103983</pub-id>
          <pub-id pub-id-type="medline">34990838</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(21)00312-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jha</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Continual knowledge infusion into pre-trained biomedical language models</article-title>
          <source>Bioinformatics</source>
          <year>2022</year>
          <month>01</month>
          <day>03</day>
          <volume>38</volume>
          <issue>2</issue>
          <fpage>494</fpage>
          <lpage>502</lpage>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btab671</pub-id>
          <pub-id pub-id-type="medline">34554186</pub-id>
          <pub-id pub-id-type="pii">6374496</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Borchert</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Schapranow</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <article-title>HPI-DHC @ BioASQ DisTEMIST: Spanish biomedical entity linking with pre-trained transformers and cross-lingual candidate retrieval</article-title>
          <source>Proceedings of the Conference and Labs of the Evaluation Forum</source>
          <year>2022</year>
          <conf-name>CLEF 2022</conf-name>
          <conf-date>September 5-8, 2022</conf-date>
          <conf-loc>Bologna, Italy</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>BX</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wing-Yiu Ng</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chia</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Hang-Kwong So</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Kai-Lam Cheung</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Conversational system for clinical communication training supporting user-defined tasks</article-title>
          <source>Proceedings of the IEEE International Conference on Teaching, Assessment and Learning for Engineering</source>
          <year>2022</year>
          <conf-name>TALE 2022</conf-name>
          <conf-date>December 4-7, 2022</conf-date>
          <conf-loc>Hung Hom, Hong Kong</conf-loc>
          <pub-id pub-id-type="doi">10.1109/tale54877.2022.00071</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morine</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Priami</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Coronado</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Haber</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kaput</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A comprehensive and holistic health database</article-title>
          <source>Proceedings of the IEEE International Conference on Digital Health</source>
          <year>2022</year>
          <conf-name>ICDH 2022</conf-name>
          <conf-date>July 10-16, 2022</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icdh55609.2022.00039</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Stacking-BERT model for Chinese medical procedure entity normalization</article-title>
          <source>Math Biosci Eng</source>
          <year>2023</year>
          <month>01</month>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>1018</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aimspress.com/article/10.3934/mbe.2023047"/>
          </comment>
          <pub-id pub-id-type="doi">10.3934/mbe.2023047</pub-id>
          <pub-id pub-id-type="medline">36650800</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Llorca</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Borchert</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Schapranow</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <article-title>A meta-dataset of german medical corpora: harmonization of annotations and cross-corpus NER evaluation</article-title>
          <source>Proceedings of the 5th Clinical Natural Language Processing Workshop</source>
          <year>2023</year>
          <conf-name>ClinicalNLP@ACL 2023</conf-name>
          <conf-date>July 14, 2023</conf-date>
          <conf-loc>Toronto, ON</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2023.clinicalnlp-1.23</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hristov</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ivanov</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Aksenova</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Asamov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gyurov</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Primov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Boytcheva</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Clinical text classification to SNOMED CT codes using transformers trained on linked open medical ontologies</article-title>
          <source>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</source>
          <year>2023</year>
          <conf-name>RANLP 2023</conf-name>
          <conf-date>September 4-6, 2023</conf-date>
          <conf-loc>Varna, Bulgaria</conf-loc>
          <pub-id pub-id-type="doi">10.26615/978-954-452-092-2_057</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Makhervaks</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gillis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Radinsky</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Clinical contradiction detection</article-title>
          <source>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2023</year>
          <conf-name>EMNLP 2023</conf-name>
          <conf-date>December 6-10, 2023</conf-date>
          <conf-loc>Singapore, Singapore</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2023.emnlp-main.80</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Horrocks</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Reveal the unknown: out-of-knowledge-base mention discovery with entity linking</article-title>
          <source>Proceedings of the 32nd ACM International Conference on Information and Knowledge Management</source>
          <year>2023</year>
          <conf-name>CIKM '23</conf-name>
          <conf-date>October 21-25, 2023</conf-date>
          <conf-loc>Birmingham, UK</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3583780.3615036</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mazwi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AE</given-names>
            </name>
          </person-group>
          <article-title>AnnoDash, a clinical terminology annotation dashboard</article-title>
          <source>JAMIA Open</source>
          <year>2023</year>
          <month>07</month>
          <day>08</day>
          <volume>6</volume>
          <issue>3</issue>
          <fpage>ooad046</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37425489"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamiaopen/ooad046</pub-id>
          <pub-id pub-id-type="medline">37425489</pub-id>
          <pub-id pub-id-type="pii">ooad046</pub-id>
          <pub-id pub-id-type="pmcid">PMC10329488</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Automatic knowledge extraction from Chinese electronic medical records and rheumatoid arthritis knowledge graph construction</article-title>
          <source>Quant Imaging Med Surg</source>
          <year>2023</year>
          <month>06</month>
          <day>01</day>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>3873</fpage>
          <lpage>90</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37284084"/>
          </comment>
          <pub-id pub-id-type="doi">10.21037/qims-22-1158</pub-id>
          <pub-id pub-id-type="medline">37284084</pub-id>
          <pub-id pub-id-type="pii">qims-13-06-3873</pub-id>
          <pub-id pub-id-type="pmcid">PMC10240026</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Searle</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ibrahim</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Teo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Discharge summary hospital course summarisation of in patient electronic health record text with clinical concept guided deep pre-trained transformer models</article-title>
          <source>J Biomed Inform</source>
          <year>2023</year>
          <month>05</month>
          <volume>141</volume>
          <fpage>104358</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(23)00079-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2023.104358</pub-id>
          <pub-id pub-id-type="medline">37023846</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(23)00079-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A robustly optimized BERT pre-training approach with post-training</article-title>
          <source>Proceedings of the 20th China National Conference on Chinese Computational Linguistics</source>
          <year>2021</year>
          <conf-name>CCL 2021</conf-name>
          <conf-date>August 13-15, 2021</conf-date>
          <conf-loc>Hohhot, China</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-030-84186-7_31</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goodman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gimpel</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Soricut</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>ALBERT: a lite BERT for self-supervised learning of language representations</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on September 26, 2019</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1909.11942</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tinn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lucas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Usuyama</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Naumann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Poon</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Domain-specific language model pretraining for biomedical natural language processing</article-title>
          <source>ACM Trans Comput Healthc</source>
          <year>2021</year>
          <month>10</month>
          <day>15</day>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1145/3458754</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alsentzer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Murph</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Boag</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Jindi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Naumann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Publicly available clinical BERT embeddings</article-title>
          <source>Proceedings of the 2nd Clinical Natural Language Processing Workshop</source>
          <year>2019</year>
          <conf-name>ClinicalNLP 2019</conf-name>
          <conf-date>June 7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w19-1909</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Shareghi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Basaldella</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Collier</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Self-alignment pretraining for biomedical entity representations</article-title>
          <source>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2021</year>
          <conf-name>NAACL-HLT 2021</conf-name>
          <conf-date>June 6-11, 2021</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2021.naacl-main.334</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bressem</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Papaioannou</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Grundmann</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Borchert</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Busch</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Loyen</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Niehues</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Augustin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Grosser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Makowski</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Aerts</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Löser</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>medBERT.de: a comprehensive German BERT model for the medical domain</article-title>
          <source>Expert Syst Appl</source>
          <year>2024</year>
          <month>03</month>
          <day>01</day>
          <volume>237</volume>
          <fpage>121598</fpage>
          <pub-id pub-id-type="doi">10.1016/j.eswa.2023.121598</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Poon</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>TY</given-names>
            </name>
          </person-group>
          <article-title>BioGPT: generative pre-trained transformer for biomedical text generation and mining</article-title>
          <source>Brief Bioinform</source>
          <year>2022</year>
          <month>11</month>
          <day>19</day>
          <volume>23</volume>
          <issue>6</issue>
          <fpage>bbac409</fpage>
          <pub-id pub-id-type="doi">10.1093/bib/bbac409</pub-id>
          <pub-id pub-id-type="medline">36156661</pub-id>
          <pub-id pub-id-type="pii">6713511</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ghazvininejad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mohamed</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension</article-title>
          <source>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2020</year>
          <conf-name>ACL 2020</conf-name>
          <conf-date>July 5-10, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.703</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karimi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Metke-Jimenez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kemp</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Cadec: a corpus of adverse drug event annotations</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>06</month>
          <volume>55</volume>
          <fpage>73</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00053-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.03.010</pub-id>
          <pub-id pub-id-type="medline">25817970</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00053-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zolnoori</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Patrick</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Fontelo</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kharrazi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Faiola</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>ND</given-names>
            </name>
            <name name-style="western">
              <surname>Shirley Wu</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Eldredge</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Conway</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Moayyed</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>The PsyTAR dataset: from patients generated narratives to a corpus of adverse drug events and effectiveness of psychiatric medications</article-title>
          <source>Data Brief</source>
          <year>2019</year>
          <month>03</month>
          <day>15</day>
          <volume>24</volume>
          <fpage>103838</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2352-3409(19)30189-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.dib.2019.103838</pub-id>
          <pub-id pub-id-type="medline">31065579</pub-id>
          <pub-id pub-id-type="pii">S2352-3409(19)30189-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC6495095</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Romanov</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shivade</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Lessons from natural language inference in the clinical domain</article-title>
          <source>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2018</year>
          <conf-name>EMNLP 2018</conf-name>
          <conf-date>October 31-November 4, 2015</conf-date>
          <conf-loc>Brussels, Belgium</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/d18-1187</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mohan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>MedMentions: a large biomedical corpus annotated with UMLS concepts</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on February 25, 2019</comment>
          <pub-id pub-id-type="doi">10.48550/arxiv.1902.09476</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suominen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Salanterä</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Velupillai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Pradhan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>South</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Mowery</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>GJ</given-names>
            </name>
            <name name-style="western">
              <surname>Leveling</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Goeuriot</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Martinez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zuccon</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Overview of the ShARe/CLEF eHealth evaluation lab 2013</article-title>
          <source>Proceedings of the 4th International Conference of the CLEF Initiative onInformation Access Evaluation. Multilinguality, Multimodality, and Visualization</source>
          <year>2013</year>
          <conf-name>CLEF 2013</conf-name>
          <conf-date>September 23-26, 2013</conf-date>
          <conf-loc>Valencia, Spain</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-642-40802-1_24</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Analogical inference for multi-relational embeddings</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on May 6, 2017</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1705.02426</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kipf</surname>
              <given-names>TN</given-names>
            </name>
            <name name-style="western">
              <surname>Welling</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Semi-supervised classification with graph convolutional networks</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on September 9, 2016</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1609.02907</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beam</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Kompa</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Schmaltz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fried</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Weber</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Palmer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>IS</given-names>
            </name>
          </person-group>
          <article-title>Clinical concept embeddings learned from massive sources of multimodal medical data</article-title>
          <source>Biocomputing</source>
          <year>2019</year>
          <fpage>295</fpage>
          <lpage>306</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.worldscientific.com/doi/abs/10.1142/9789811215636_0027"/>
          </comment>
          <pub-id pub-id-type="doi">10.1142/9789811215636_0027</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Effective mapping of biomedical text to the UMLS Metathesaurus: the MetaMap program</article-title>
          <source>Proc AMIA Symp</source>
          <year>2001</year>
          <fpage>17</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/11825149"/>
          </comment>
          <pub-id pub-id-type="medline">11825149</pub-id>
          <pub-id pub-id-type="pii">D010001275</pub-id>
          <pub-id pub-id-type="pmcid">PMC2243666</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>D’Souza</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Sieve-based entity linking for the biomedical domain</article-title>
          <source>Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing</source>
          <year>2015</year>
          <conf-name>ACL 2015</conf-name>
          <conf-date>July 26-31, 2015</conf-date>
          <conf-loc>Beijing, China</conf-loc>
          <pub-id pub-id-type="doi">10.3115/v1/p15-2049</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barrault</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Biesialska</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bojar</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Costa-jussà</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Federmann</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Graham</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Grundkiewicz</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Haddow</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Huck</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Joanis</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kocmi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Koehn</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>CK</given-names>
            </name>
            <name name-style="western">
              <surname>Ljubešić</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Monz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Morishita</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nagata</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nakazawa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Pal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Post</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zampieri</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Findings of the 2020 conference on machine translation (WMT20)</article-title>
          <source>Proceedings of the Fifth Conference on Machine Translation</source>
          <year>2020</year>
          <conf-name>WMT 2020</conf-name>
          <conf-date>November 19-20, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w19-5301</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Papineni</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Roukos</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ward</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>WJ</given-names>
            </name>
          </person-group>
          <article-title>BLEU: a method for automatic evaluation of machine translation</article-title>
          <source>Proceedings of the 40th Annual Meeting on Association for Computational Linguistics</source>
          <year>2002</year>
          <conf-name>ACL '02</conf-name>
          <conf-date>July 7-12, 2002</conf-date>
          <conf-loc>Philadelphia, PA</conf-loc>
          <pub-id pub-id-type="doi">10.3115/1073083.1073135</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Och</surname>
              <given-names>FJ</given-names>
            </name>
          </person-group>
          <article-title>Automatic evaluation of machine translation quality using longest common subsequence and skip-bigram statistics</article-title>
          <source>Proceedings of the 42nd Annual Meeting on Association for Computational Linguistics</source>
          <year>2004</year>
          <conf-name>ACL '04</conf-name>
          <conf-date>July 21-26, 2004</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <pub-id pub-id-type="doi">10.3115/1218955.1219032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref79">
        <label>79</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Björk</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Solomon</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The publishing delay in scholarly peer-reviewed journals</article-title>
          <source>J Informetr</source>
          <year>2013</year>
          <month>10</month>
          <volume>7</volume>
          <issue>4</issue>
          <fpage>914</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1016/j.joi.2013.09.001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref80">
        <label>80</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spasic</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nenadic</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Clinical text data in machine learning: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>03</month>
          <day>31</day>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>e17984</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/3/e17984/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17984</pub-id>
          <pub-id pub-id-type="medline">32229465</pub-id>
          <pub-id pub-id-type="pii">v8i3e17984</pub-id>
          <pub-id pub-id-type="pmcid">PMC7157505</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref81">
        <label>81</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jonnagaddala</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liaw</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Ray</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>NW</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>HJ</given-names>
            </name>
          </person-group>
          <article-title>Coronary artery disease risk assessment from unstructured electronic health records using text mining</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>12</month>
          <volume>58 Suppl</volume>
          <issue>Suppl</issue>
          <fpage>S203</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00170-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.08.003</pub-id>
          <pub-id pub-id-type="medline">26319542</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00170-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4985289</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref82">
        <label>82</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sezgin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hussain</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Rust</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Extracting medical information from free-text and unstructured patient-generated health data using natural language processing methods: feasibility study with real-world data</article-title>
          <source>JMIR Form Res</source>
          <year>2023</year>
          <month>03</month>
          <day>07</day>
          <volume>7</volume>
          <fpage>e43014</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2023//e43014/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/43014</pub-id>
          <pub-id pub-id-type="medline">36881467</pub-id>
          <pub-id pub-id-type="pii">v7i1e43014</pub-id>
          <pub-id pub-id-type="pmcid">PMC10031450</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref83">
        <label>83</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Humm</surname>
              <given-names>BG</given-names>
            </name>
            <name name-style="western">
              <surname>Archer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bense</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bernier</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Goetz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hoppe</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Schumann</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Siegel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wenning</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zender</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>New directions for applied knowledge-based AI and machine learning</article-title>
          <source>Informatik Spektrum</source>
          <year>2022</year>
          <month>12</month>
          <day>30</day>
          <volume>46</volume>
          <issue>2</issue>
          <fpage>65</fpage>
          <lpage>78</lpage>
          <pub-id pub-id-type="doi">10.1007/S00287-022-01513-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref84">
        <label>84</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Give us the facts: enhancing large language models with knowledge graphs for fact-aware language modeling</article-title>
          <source>IEEE Trans Knowl Data Eng</source>
          <year>2024</year>
          <month>7</month>
          <volume>36</volume>
          <issue>7</issue>
          <fpage>3091</fpage>
          <lpage>110</lpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2024.3360454</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref85">
        <label>85</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Confalonieri</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Del Prado</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Agramunt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Malagarriga</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Faggion</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Weyde</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Besold</surname>
              <given-names>TR</given-names>
            </name>
          </person-group>
          <article-title>An ontology-based approach to explaining artificial neural networks</article-title>
          <source>Proceedings of the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases</source>
          <year>2019</year>
          <conf-name>ECML PKDD 2019</conf-name>
          <conf-date>September 16-20, 2019</conf-date>
          <conf-loc>Würzburg, Germany</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref86">
        <label>86</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Farfán Sedano</surname>
              <given-names>FJ</given-names>
            </name>
            <name name-style="western">
              <surname>Terrón Cuadrado</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>García Rebolledo</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Castellanos Clemente</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Serrano Balazote</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gómez Delgado</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Implementation of SNOMED CT to the medicines database of a general hospital</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2009</year>
          <volume>148</volume>
          <fpage>123</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="medline">19745242</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref87">
        <label>87</label>
        <nlm-citation citation-type="web">
          <article-title>sopankhosla / MedFilter</article-title>
          <source>GitHub</source>
          <access-date>2024-06-04</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/sopankhosla/MedFilter">https://github.com/sopankhosla/MedFilter</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref88">
        <label>88</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sterne</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Hernán</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>McAleenan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Reeves</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Higgins</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Higgins</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chandler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cumpston</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Page</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Welch</surname>
              <given-names>VA</given-names>
            </name>
          </person-group>
          <article-title>Chapter 25: assessing risk of bias in a non-randomized study</article-title>
          <source>Cochrane Handbook for Systematic Reviews of Interventions Version 6.5</source>
          <year>2024</year>
          <publisher-loc>London, UK</publisher-loc>
          <publisher-name>The Cochrane Collaboration</publisher-name>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
