<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v13i1e68704</article-id>
      <article-id pub-id-type="pmid">40203304</article-id>
      <article-id pub-id-type="doi">10.2196/68704</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Improving Phenotyping of Patients With Immune-Mediated Inflammatory Diseases Through Automated Processing of Discharge Summaries: Multicenter Cohort Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Goldman</surname>
            <given-names>Jean-Philippe</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Gaudet-Blavignac</surname>
            <given-names>Christophe</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lagarde</surname>
            <given-names>Elagarde</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chowdhury</surname>
            <given-names>Shaika</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Oworah</surname>
            <given-names>Sunday</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Remaki</surname>
            <given-names>Adam</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Limics</institution>
            <institution>Université Sorbonne Paris-Nord, Inserm</institution>
            <institution>Sorbonne Université</institution>
            <addr-line>15 Rue de l'École de Médecine</addr-line>
            <addr-line>Paris, 75006</addr-line>
            <country>France</country>
            <phone>33 783049678</phone>
            <email>ad.remaki@gmail.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8902-8207</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Ung</surname>
            <given-names>Jacques</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0002-1174-4729</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Pages</surname>
            <given-names>Pierre</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0000-3013-9271</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Wajsburt</surname>
            <given-names>Perceval</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9746-9993</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Elise</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2253-6407</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Faure</surname>
            <given-names>Guillaume</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-1948-5863</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Petit-Jean</surname>
            <given-names>Thomas</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4433-442X</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Tannier</surname>
            <given-names>Xavier</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2452-8868</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Gérardin</surname>
            <given-names>Christel</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9303-6349</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Limics</institution>
        <institution>Université Sorbonne Paris-Nord, Inserm</institution>
        <institution>Sorbonne Université</institution>
        <addr-line>Paris</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Pôle Innovation et Données</institution>
        <institution>Direction des Services Numériques</institution>
        <institution>Assistance Publique – Hôpitaux de Paris</institution>
        <addr-line>Paris</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Centre de Pharmacoépidémiologie</institution>
        <institution>Hôpital Pitié Salpêtrière</institution>
        <institution>Assistance Publique – Hôpitaux de Paris</institution>
        <addr-line>Paris</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Service de médecine interne</institution>
        <institution>Hôpital Tenon</institution>
        <institution>Assistance Publique – Hôpitaux de Paris</institution>
        <addr-line>Paris</addr-line>
        <country>France</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Adam Remaki <email>ad.remaki@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>9</day>
        <month>4</month>
        <year>2025</year>
      </pub-date>
      <volume>13</volume>
      <elocation-id>e68704</elocation-id>
      <history>
        <date date-type="received">
          <day>12</day>
          <month>11</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>25</day>
          <month>12</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>21</day>
          <month>1</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>25</day>
          <month>1</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Adam Remaki, Jacques Ung, Pierre Pages, Perceval Wajsburt, Elise Liu, Guillaume Faure, Thomas Petit-Jean, Xavier Tannier, Christel Gérardin. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 09.04.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2025/1/e68704" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Valuable insights gathered by clinicians during their inquiries and documented in textual reports are often unavailable in the structured data recorded in electronic health records (EHRs).</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to highlight that mining unstructured textual data with natural language processing techniques complements the available structured data and enables more comprehensive patient phenotyping. A proof-of-concept for patients diagnosed with specific autoimmune diseases is presented, in which the extraction of information on laboratory tests and drug treatments is performed.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We collected EHRs available in the clinical data warehouse of the Greater Paris University Hospitals from 2012 to 2021 for patients hospitalized and diagnosed with 1 of 4 immune-mediated inflammatory diseases: systemic lupus erythematosus, systemic sclerosis, antiphospholipid syndrome, and Takayasu arteritis. Then, we built, trained, and validated natural language processing algorithms on 103 discharge summaries selected from the cohort and annotated by a clinician. Finally, all discharge summaries in the cohort were processed with the algorithms, and the extracted data on laboratory tests and drug treatments were compared with the structured data.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Named entity recognition followed by normalization yielded <italic>F</italic><sub>1</sub>-scores of 71.1 (95% CI 63.6-77.8) for the laboratory tests and 89.3 (95% CI 85.9-91.6) for the drugs. Application of the algorithms to 18,604 EHRs increased the detection of antibody results and drug treatments. For instance, among patients in the systemic lupus erythematosus cohort with positive antinuclear antibodies, the rate increased from 18.34% (752/4102) to 71.87% (2949/4102), making the results more consistent with the literature.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>While challenges remain in standardizing laboratory tests, particularly with abbreviations, this work, based on secondary use of clinical data, demonstrates that automated processing of discharge summaries enriched the information available in structured data and facilitated more comprehensive patient profiling.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>secondary use of clinical data for research and surveillance</kwd>
        <kwd>clinical informatics</kwd>
        <kwd>clinical data warehouse</kwd>
        <kwd>electronic health record</kwd>
        <kwd>data science</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>AI</kwd>
        <kwd>natural language processing</kwd>
        <kwd>ontologies</kwd>
        <kwd>classifications</kwd>
        <kwd>coding</kwd>
        <kwd>tools</kwd>
        <kwd>programs and algorithms</kwd>
        <kwd>immune-mediated inflammatory diseases</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Since the 2010s, the widespread adoption of electronic health records (EHRs) and health data warehouses has enabled the development and application of new algorithms for patient phenotyping, which corresponds to the extraction of a set of observable patient characteristics, including laboratory test results, symptoms, diseases, and past or current treatments [<xref ref-type="bibr" rid="ref1">1</xref>]. The automated extraction of these characteristics from large-scale databases supports predictive risk assessments, preselection for therapeutic trials, and pharmacovigilance analyses [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>].</p>
        <p>EHR data is typically categorized into 2 types: structured data and unstructured data. Structured data refers to directly queryable numerical values, such as laboratory test results or <italic>International Classification of Diseases, Tenth Revision</italic> (<italic>ICD</italic>-<italic>10</italic>) codes, while unstructured data encompasses raw clinical texts and medical imaging. Structured data from clinical warehouses is often incomplete, capturing only intrahospital records and excluding extrahospital information. For instance, a patient’s blood test conducted at an external laboratory before hospitalization might not be included. In addition, historical biological results in clinical databases are often limited to a few years. This is particularly problematic for conditions like autoimmune diseases, where historical immunologic results critical to the initial diagnosis are often documented only in clinical text rather than in structured data. Similarly, details about prior treatments are usually found only in textual records. Valuable information that is not present in structured data is often found in observations recorded in the discharge summaries [<xref ref-type="bibr" rid="ref5">5</xref>]. The application of automated text analysis to this unstructured text, in conjunction with structured data, has already demonstrated increased effectiveness in predicting patients’ clinical courses [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>Transforming unstructured data into structured formats involves multiple natural language processing (NLP) tasks. In this research, we primarily concentrate on named entity recognition (NER) and normalization, which are fundamental for extracting meaningful information from large volumes of unstructured clinical text.</p>
        <p>NER refers to locating and classifying terms into predefined categories, such as drug name, laboratory test, or medical disorder. Traditional NER methods often depend on dictionary-based term-matching techniques, which require meticulously maintained lexical resources [<xref ref-type="bibr" rid="ref12">12</xref>]. However, maintaining these resources can be both labor-intensive and error-prone. A more effective method treats NER as a sequence-labeling task using tagging systems like the beginning, inside, outside, unit, and last scheme, which is widely recognized in biomedical NER for its ease of implementation and efficiency [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Sequence labeling models, particularly conditional random fields [<xref ref-type="bibr" rid="ref15">15</xref>], have been extensively used for NER. When combined with transformer-based architectures like bidirectional encoder representations from transformer (BERT), these models have set state-of-the-art performance benchmarks for NER in clinical and biomedical text analyses [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref19">19</xref>].</p>
        <p>Following NER, the normalization process assigns standard codes (unique identifiers that correspond to concepts within established medical terminologies) to the detected terms. For example, standard codes, such as concept unique identifiers (CUIs) from the Unified Medical Language System (UMLS) [<xref ref-type="bibr" rid="ref20">20</xref>], can be used to map detected entities like drug or laboratory tests to their corresponding concepts. Common normalization strategies often rely on exact or approximate string matching against predefined dictionaries. Tools, such as KnowledgeMap Concept Identifier [<xref ref-type="bibr" rid="ref21">21</xref>], MetaMap [<xref ref-type="bibr" rid="ref22">22</xref>], MedLEE [<xref ref-type="bibr" rid="ref23">23</xref>], MedEx [<xref ref-type="bibr" rid="ref24">24</xref>], HITEx [<xref ref-type="bibr" rid="ref25">25</xref>], and cTAKES [<xref ref-type="bibr" rid="ref26">26</xref>] have been widely adopted in phenotyping models [<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. The emergence of deep contextual embeddings, notably BERT [<xref ref-type="bibr" rid="ref30">30</xref>], has revolutionized NLP methodologies, including normalization tasks. Current state-of-the-art approaches heavily use transformer-based encoders pretrained on domain-specific corpora, demonstrating substantial improvements in normalization [<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref33">33</xref>].</p>
        <p>Although large language models like GPT-4 [<xref ref-type="bibr" rid="ref34">34</xref>] hold promise for biomedical applications, their current performance in tasks like NER and normalization remains limited [<xref ref-type="bibr" rid="ref35">35</xref>]. Moreover, implementing these models at scale to extract phenotypes from large volumes of clinical documents poses considerable cost challenges.</p>
      </sec>
      <sec>
        <title>Goal of the Study</title>
        <p>The aim of the study was to provide a proof-of-concept for end-to-end patient phenotyping from their EHRs. Patient phenotyping refers to the process of characterizing patients based on their clinical features, such as clinical diagnoses, laboratory results, or drug treatments. Secondary uses of EHRs require the application of various processes to transform the data into meaningful variables. In this research, we focused specifically on leveraging discharge summaries (written in French) through NLP techniques to enrich the information contained in the structured data. We restricted our study to patients hospitalized for one of the following immune-mediated inflammatory diseases: systemic lupus erythematosus (SLE), systemic sclerosis, antiphospholipid syndrome (APS), and Takayasu arteritis (TA). As we analyzed autoimmune diseases, we also restricted phenotyping to the analysis of autoantibodies (laboratory tests) and immunosuppressive therapies (drugs), which are central to the management of these diseases. As shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>, laboratory tests and drug therapies were extracted from both structured and unstructured data. Then, to analyze the data jointly, a standard concept code was assigned to each laboratory test using the Systematized Nomenclature of Medicine Clinical Terms (SNOMED CT; US edition) [<xref ref-type="bibr" rid="ref36">36</xref>] and drug using the Anatomical Therapeutic Chemical (ATC) classification [<xref ref-type="bibr" rid="ref37">37</xref>]. Our hypothesis was that incorporating the results of laboratory tests and drug treatments recorded in patients’ discharge summaries would complement the information available in structured data and enable more in-depth, interoperable phenotyping of patients, while remaining reliable.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview of the end-to-end patient phenotyping pipeline. Structured and unstructured data are extracted from electronic health records, enabling large information retrieval, refining cohort selection, and facilitating more robust patient comparisons. ATC: Anatomical Therapeutic Chemical; SNOMED: Systematized Nomenclature of Medicine Clinical Terms.</p>
          </caption>
          <graphic xlink:href="medinform_v13i1e68704_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Selected Diseases</title>
        <p>As a proof of concept, we focused on 4 immune-mediated inflammatory diseases: SLE, systemic sclerosis, APS, and TA.</p>
        <p>SLE is an autoimmune disease that mainly affects the skin, joints, and kidneys [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. According to the revised 2019 EULAR/ACR classification criteria for SLE, patients are eligible for SLE criteria only if they have a positive antinuclear antibody ≥1/80 at least once. Anti-dsDNA and anti-Smith autoantibodies with high specificity for SLE are also included in the classification criteria for SLE. Therefore, we have chosen these 3 antibodies to identify SLE patients. Hydroxychloroquine, glucocorticoids, mycophenolate mofetil, cyclophosphamide, and belimumab are key treatments of SLE [<xref ref-type="bibr" rid="ref40">40</xref>] and have been chosen to identify patients with SLE.</p>
        <p>Systemic sclerosis is a rare autoimmune disease, inducing skin fibrosis, digestive disorders, such as gastroesophageal reflux disease and chronic pseudoocclusive syndrome, interstitial lung involvement, and sometimes inaugural renal crisis. Classification criteria are also based on specific autoantibodies, including anti-Scl-70, anticentromere, and anti-RNA polymerase III, which we have chosen to analyze here [<xref ref-type="bibr" rid="ref41">41</xref>]. Therapeutic management is also based on glucocorticoids and immunosuppressive drugs, such as mycophenolate mofetil.</p>
        <p>APS is a systemic autoimmune disease defined by the thrombosis or pregnancy morbidity in the presence of persistent antiphospholipid autoantibodies, lupus anticoagulant, IgG or IgM anticardiolipin, IgG or IgM anti-β2glycoprotein-1 antibodies. Treatment is based on curative anticoagulation with heparin, low-molecular-weight heparin, and antivitamin K [<xref ref-type="bibr" rid="ref42">42</xref>].</p>
        <p>TA is an inflammatory disease of the large arteries, leading to arterial stenosis in young people. C-reactive protein is used as an indicator of inflammation and disease activity in TA. Treatment is based on immunosuppressive therapies, such as glucocorticoids, and biologic, such as methotrexate or tocilizumab. Therefore, we have chosen to focus our analysis on these treatments.</p>
        <p>Finally, in the context of the immunosuppressive treatments proposed, patients are at greater risk of infection; therefore, vaccination, particularly against pneumococcal and influenza infections, is recommended. Hence, we also looked for this information in the texts.</p>
      </sec>
      <sec>
        <title>Dataset Selection</title>
        <p>The dataset used in this study comes from the clinical data warehouse (CDW) of the University Hospitals of Greater Paris (Assistance publique-hôpitaux de Paris; AP-HP). The CDW brings together information on all patients followed in the 39 teaching hospitals in the Paris region (&#62;22,000 beds and 1.5 million hospitalizations per year) that use a common EHR software, ORBIS Dedalus Health care. This software has been gradually implemented in the 39 hospitals since 2012.</p>
        <p>The dataset was extracted from the CDW research database, in the integrating biology and the bedside format [<xref ref-type="bibr" rid="ref43">43</xref>]. The inclusion criteria for the study were as follows: all patients aged &#62;15 years with SLE, systemic sclerosis, APS, or TA who had at least one stay at AP-HP hospitals initially from July 1, 2017, to December, 31, 2020. Patients in the database were selected in 2 ways: by the <italic>ICD-10</italic> codes of these 4 pathologies and by keywords present in the medical reports (using regular expression matching), as summarized in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref51">51</xref>]. For these patients, the data available were demographic data; textual data, including all full-text medical reports, laboratory tests performed during patients’ stay, drug prescription, and administration when available; and medico-administrative coding data (<italic>ICD-10</italic>). The extraction covered all medical departments that could potentially manage patients with the 4 pathologies of interest: internal medicine and clinical immunology, nephrology, rheumatology, dermatology, pneumology, neurology, gastroenterology, oncology, hematology, infectious diseases, and emergency and intensive care.</p>
        <p>As this study involves the secondary use of real-life health data, from this large integrating biology and bedside extraction, we limited the study to EHRs with at least one <italic>ICD-10</italic> code corresponding to the diseases studied (SLE, systemic sclerosis, APS, or TA) and at least one recorded hospital discharge summary, as these are validated by a senior clinician. Subsequently, a subset of this study cohort of 103 hospital discharge summaries, each corresponding to a different patient, was randomly selected and annotated by a clinician (CG), following the same annotation rules as proposed by the national NLP clinical challenges 2022 [<xref ref-type="bibr" rid="ref44">44</xref>]. Details regarding this annotation process are provided in the Annotation Guidelines section of <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The global approach of this work was to build, train, and validate NLP algorithms on the annotated subset before applying it to the full study cohort. <xref rid="figure2" ref-type="fig">Figure 2</xref> presents the cohort selection process.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Cohort selection flowchart. Starting with integrating biology and bedside extraction of 1,947,870 stays (38,384 patients), the cohort was filtered to include stays with at least 1 International Classification of Diseases, 10th Revision (ICD-10) code corresponding to the studied diseases and at least 1 recorded hospital discharge summary. A final study cohort of 18,604 stays (6891 patients) was created, with 103 randomly selected discharge summaries annotated for training and validation purposes. NLP: natural language processing.</p>
          </caption>
          <graphic xlink:href="medinform_v13i1e68704_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>End-to-End Pipeline</title>
        <sec>
          <title>Overview</title>
          <p>The system presented in this work required 4 NLP tasks: (1) NER: this task identified and classified entities of interest mentioned in the text into predefined categories. The possible categories included drug name, drug strength, drug dosage, drug form, laboratory test name, and complete laboratory test. (2) Qualification: this task involved assigning predefined qualifiers to the recognized named entities. Only entities classified as “drug name” by the NER algorithm were qualified There were 4 qualifiers [<xref ref-type="bibr" rid="ref52">52</xref>]: action (start, stop, increase, decrease, unique dose, and unknown), temporality (present, past, and future), certainty (certain, hypothetical, and conditional), as well as negation (true and false). (3) Measurement extraction: this task extracted and standardized the value and unit contained in the “complete laboratory test” entities detected by the NER algorithm. (4) Normalization: this task assigned predefined standard concepts to the recognized named entities. Each entity classified as “drug name” by the NER algorithm was assigned a code from the ATC classification system [<xref ref-type="bibr" rid="ref37">37</xref>]. Each entity classified as “laboratory test name” by the NER algorithm was assigned a CUI of the UMLS [<xref ref-type="bibr" rid="ref20">20</xref>] restricted to the laboratory procedure semantic type and the SNOMED CT US edition vocabulary [<xref ref-type="bibr" rid="ref36">36</xref>]. As described in <xref rid="figure3" ref-type="fig">Figures 3</xref> and <xref rid="figure4" ref-type="fig">Figure 4</xref>, the laboratory test pipeline and the drug pipeline involved both NER and normalization, while measurement extraction only concerned the laboratory test pipeline and qualification only concerned the drug pipeline.</p>
          <p>All the work presented in this paper was programmed in Python. Tabular data were processed with Spark (version 2.4.8) and distributed over 160 central processing units in parallel. This computing process is scalable over a large amount of data. Then, the cohorts were analyzed using Pandas (version 1.3.5). Inference and training of the NLP algorithms have been achieved on a V100 graphics processing unit. The code developed to run the experiments is freely available in a GitHub repository: Aremaki/BioMedics [<xref ref-type="bibr" rid="ref53">53</xref>]. The code makes extensive use of EDS-NLP (version 0.13.0) [<xref ref-type="bibr" rid="ref54">54</xref>], a collaborative NLP framework that aims primarily at building hybrid multitask NLP pipelines and extracting information from French clinical notes. It has also been made publicly available under an open-source license (BSD 3-clause): aphp/edsnlp.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Diagram of the laboratory test pipeline. It takes raw text as input, which is processed by 3 algorithms in total. It starts with the extraction and classification of relevant terms into 2 categories: laboratory test name and complete laboratory test. Then, the measurements associated with the complete laboratory tests are extracted and standardized into 2 components: value and unit. Finally, the extracted laboratory test names are normalized to the concept unique identifiers (CUIs) of the Unified Medical Language System (UMLS). HBV: hepatitis B virus; HCV: hepatitis C virus.</p>
            </caption>
            <graphic xlink:href="medinform_v13i1e68704_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Diagram of the drug pipeline. It takes raw text as input, which is processed by 3 algorithms in total. It starts with the extraction and classification of relevant terms into 4 categories: name, strength, dosage, and form. At the same time, drugs are also qualified with several possible values: action (start, stop, increase, decrease, unique dose, and unknown), temporality (present, past, and future), certainty (certain, hypothetical, and conditional), and negation (true and false). Then, the extracted drugs are normalized according to the Anatomical Therapeutic Chemical (ATC) classification system. Cp: capsule.</p>
            </caption>
            <graphic xlink:href="medinform_v13i1e68704_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>NER and Qualification Tasks</title>
          <p>In the NER and qualification step, we experimented with different methods: a rule-based method as a preliminary approach, using the terms provided by the standard terminologies directly for exact matching. The ATC classification system [<xref ref-type="bibr" rid="ref37">37</xref>] was used for drugs, and the SNOMED CT US edition vocabulary [<xref ref-type="bibr" rid="ref36">36</xref>] was used for laboratory tests. A detailed description of the dictionaries is provided in Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          <p>As a second approach, we experimented with a deep neural network architecture, described in <xref rid="figure5" ref-type="fig">Figure 5</xref>. The model consists of 2 BERTs encoders [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref55">55</xref>] followed by 2 convolution neural networks [<xref ref-type="bibr" rid="ref56">56</xref>]. The first one is followed by a conditional random fields decoder [<xref ref-type="bibr" rid="ref15">15</xref>] and a softmax block, which outputs probability vectors based on the beginning, inside, outside, unit, and last tagging scheme [<xref ref-type="bibr" rid="ref14">14</xref>] to perform NER. The second is followed by a mean pooling layer and a softmax block to perform entity qualification. Several pretrained language models such as CamemBERT-EDS [<xref ref-type="bibr" rid="ref45">45</xref>], CamemBERT-base [<xref ref-type="bibr" rid="ref46">46</xref>], CamemBERT-bio [<xref ref-type="bibr" rid="ref47">47</xref>], and DrBERT [<xref ref-type="bibr" rid="ref48">48</xref>] have been compared.</p>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>Diagram of the named entity recognition and qualification architecture for laboratory tests and drugs entities. BERT: bidirectional encoder representations from transformer; CNN: convolution neural network; CRF: conditional random field.</p>
            </caption>
            <graphic xlink:href="medinform_v13i1e68704_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>To select the best approach, precision, recall, and <italic>F</italic><sub>1</sub>-score were evaluated on 20 AP-HP discharge summaries. The performance of the rule-based method is presented in Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, and the performance of the neural network method of each pretrained language model is presented in Tables S4 and S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The neural network approach using CamemBERT-EDS [<xref ref-type="bibr" rid="ref45">45</xref>] was selected as the final model because it demonstrated superior performance compared to the other methods. Parameters of the architecture and fine-tuning are outlined in Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        </sec>
        <sec>
          <title>Measurement Extraction Task</title>
          <p>Extraction and standardization of the numerical value and unit were carried on the outputs of the NER step, which extracts the complete laboratory test entity from the text in a single block (laboratory test name, numerical value, and unit). The extraction and standardization were achieved with a rule-based algorithm using regular expressions. The algorithm steps are described in <xref rid="figure6" ref-type="fig">Figure 6</xref>: (1) the laboratory test names were removed from the complete laboratory test entity, (2) regular expressions were designed to extract the numerical or qualitative value and the unit, and (3) qualitative values (eg, “positive,” “negative,” or “normal”) were standardized into graded numbers (1.0, 0.0, or 0.5), while units were converted to conventional standards.</p>
          <fig id="figure6" position="float">
            <label>Figure 6</label>
            <caption>
              <p>Diagram of the laboratory test measurement extraction process. HBV: hepatitis B virus; HCV: hepatitis C virus.</p>
            </caption>
            <graphic xlink:href="medinform_v13i1e68704_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Normalization Task</title>
          <p>The aim of the normalization step was to determine the standard code for each detected entity classified as “laboratory test name” and “drug name.” As described in <xref rid="figure7" ref-type="fig">Figure 7</xref>, for a given detected term, similarity scores were computed for all terms of a knowledge dictionary. The resulting standard code corresponded to the term with the highest similarity score. For drugs, the knowledge dictionary is an aggregation of 2 open-source dictionaries of drug names with their corresponding ATC codes: the UMLS [<xref ref-type="bibr" rid="ref20">20</xref>], restricted to the French ATC vocabulary [<xref ref-type="bibr" rid="ref37">37</xref>], and the Unique Drug Interoperability Repository created by the French National Agency for Medicines and Health Products Safety [<xref ref-type="bibr" rid="ref57">57</xref>]. For laboratory tests, the knowledge dictionary consists of all the French and English synonyms of the UMLS [<xref ref-type="bibr" rid="ref20">20</xref>] restricted to the laboratory procedure semantic type and the SNOMED CT US edition vocabulary [<xref ref-type="bibr" rid="ref36">36</xref>]. We experimented with 2 types of score computation: (1) fuzzy matching methods that directly compared word characters: Jaro-Winkler Distance [<xref ref-type="bibr" rid="ref49">49</xref>] and Levenshtein distance [<xref ref-type="bibr" rid="ref50">50</xref>], as well as (2) neural network–based methods that compute cosine similarity scores between the embeddings of the words: CODER-all [<xref ref-type="bibr" rid="ref33">33</xref>] and SapBERT-all [<xref ref-type="bibr" rid="ref32">32</xref>]. Table S7 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> presents the performance of each method. The neural-based method with CODER-all seems to be significantly better for laboratory tests. However, for drugs, there is no significantly better solution, so we used the Jaro-Winkler Distance [<xref ref-type="bibr" rid="ref49">49</xref>] method because it is less computationally expensive.</p>
          <fig id="figure7" position="float">
            <label>Figure 7</label>
            <caption>
              <p>Diagram of the normalization process. In this example, the best match of “paracetomol,” written with a typo, is “paracetamol,” which provides the correct Anatomical Therapeutic Chemical (ATC) code.</p>
            </caption>
            <graphic xlink:href="medinform_v13i1e68704_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The research protocol for this project was approved in 2020 by the institutional review board of Assistance Publique – Hôpitaux de Paris (AP-HP) (20-93). All data used in this study were collected as part of routine medical care, and their use for research purposes falls under the ethical guidelines of the institutional review board. All patient data were pseudonymized to ensure privacy and comply with data protection regulations. No financial compensation was provided, as the study relied solely on retrospective data from electronic health records.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Dataset Description</title>
        <p>The number of patients, hospitalizations, and discharge summaries for each disease of the study cohort are given in <xref ref-type="table" rid="table1">Table 1</xref>. The age distribution and the distribution of admission start dates for each disease are presented in Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Data description of the study cohort. Number of patients, hospitalizations, and discharge summaries for each studied disease. The number of discharge summaries is higher than the number of hospitalizations, as patients may change departments several times during the same stay (eg, be transferred to an intensive care unit, etc).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="190"/>
            <col width="270"/>
            <col width="240"/>
            <thead>
              <tr valign="top">
                <td>Disease</td>
                <td>Number of patients</td>
                <td>Number of hospitalizations</td>
                <td>Number of discharge summaries</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Antiphospholipid syndrome</td>
                <td>1059</td>
                <td>1818</td>
                <td>2380</td>
              </tr>
              <tr valign="top">
                <td>Lupus</td>
                <td>4102</td>
                <td>10,445</td>
                <td>12,500</td>
              </tr>
              <tr valign="top">
                <td>Systemic sclerosis</td>
                <td>2031</td>
                <td>6455</td>
                <td>7585</td>
              </tr>
              <tr valign="top">
                <td>Takayasu arteritis</td>
                <td>252</td>
                <td>833</td>
                <td>965</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>We performed various analyses on the extracted data combined with the structured data from the cohort. The results are reported in 2 sections. The first section presents the performance of the NLP algorithms on the annotated subset of 103 discharge summaries. The second section is about the application of these algorithms to the 22,194 discharge summaries included in the study cohort. It described the contribution of unstructured data to structured data.</p>
      </sec>
      <sec>
        <title>NLP Performance</title>
        <sec>
          <title>Overview</title>
          <p>The performance of the model was evaluated on 4 main tasks: NER, qualification, measurement extraction, and normalization. This evaluation was conducted using 2 datasets: 103 manually annotated discharge summaries from the study cohort and the publicly available Quaero FrenchMed corpus [<xref ref-type="bibr" rid="ref51">51</xref>]. Performance metrics, including precision, recall, and F<sub>1</sub>-score, were calculated and reported along with 95% CIs derived using the empirical bootstrap method at the discharge summary level [<xref ref-type="bibr" rid="ref58">58</xref>]. It is important to note that while NER and qualification required annotated data for both training and testing, measurement extraction and normalization only required annotated data for testing. Therefore, all 103 discharge summaries were annotated for NER and qualification, with 83 (80.6%) randomly selected for training and the remaining 20 (19.4%) used for testing. These 20 (19.4%) test documents were further annotated for measurement extraction. Of these 20 documents, 11 (10.7%) were additionally annotated for normalization, resulting in 668 annotated entities, which was a more time-consuming process.</p>
        </sec>
        <sec>
          <title>NER Task</title>
          <p>Our model was evaluated for the NER task on both the AP-HP annotated discharge summaries and the Quaero FrenchMed corpus [<xref ref-type="bibr" rid="ref51">51</xref>]. The results of our annotated dataset containing 103 discharge summaries from AP-HP are presented in <xref ref-type="table" rid="table2">Table 2</xref>. Precision, recall, and <italic>F</italic><sub>1</sub>-score were measured in a “strict” way, that is, a true positive result was obtained when a predicted entity and a gold entity had the exact same boundaries and label. The model achieved an overall <italic>F</italic><sub>1</sub>-score of 88.8. The results on the Quaero FrenchMed corpus [<xref ref-type="bibr" rid="ref51">51</xref>] are presented in Tables S8 and S9 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The model achieved an overall <italic>F</italic><sub>1</sub>-score of 66.2 for MEDLINE corpus and 71.71 for the European medicines agency corpus.</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Performance of the model for named entity recognition on University Hospitals of Greater Paris (Assistance Publique-Hôpitaux de Paris) discharge summaries. The model was trained on 83 annotated discharge summaries and tested on 20 discharge summaries. Each result was bootstrapped by discharge summary to provide a 95% CI given inside the brackets.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="230"/>
              <col width="220"/>
              <col width="190"/>
              <col width="190"/>
              <col width="170"/>
              <thead>
                <tr valign="bottom">
                  <td>Label</td>
                  <td>Number of entities (95% CI)</td>
                  <td>Precision (95% CI)</td>
                  <td>Recall (95% CI)</td>
                  <td><italic>F</italic><sub>1</sub>-score (95% CI)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Laboratory test name</td>
                  <td>1292 (1603-1010)</td>
                  <td>90.6 (88.5-92.7)</td>
                  <td>89.9 (87.5-92.2)</td>
                  <td>90.3 (88.2-92.3)</td>
                </tr>
                <tr valign="top">
                  <td>Complete laboratory test</td>
                  <td>1041 (1323-793)</td>
                  <td>86.2 (84.3-88.3)</td>
                  <td>83.6 (79.1-87.5)</td>
                  <td>84.9 (82.1-87.5)</td>
                </tr>
                <tr valign="top">
                  <td>Drug name</td>
                  <td>585 (731-454)</td>
                  <td>90.8 (85.8-95.4)</td>
                  <td>92.5 (88.4-95.7)</td>
                  <td>91.6 (87.5-95.1)</td>
                </tr>
                <tr valign="top">
                  <td>Drug dosage</td>
                  <td>276 (368-194)</td>
                  <td>89.1 (85.4-94.1)</td>
                  <td>86.2 (81.7-90.8)</td>
                  <td>87.7 (84.0-92.1)</td>
                </tr>
                <tr valign="top">
                  <td>Drug form</td>
                  <td>170 (247-106)</td>
                  <td>86.5 (78.8-93.4)</td>
                  <td>94.1 (91.5-97.6)</td>
                  <td>90.1 (85.6-94.0)</td>
                </tr>
                <tr valign="top">
                  <td>Drug strength</td>
                  <td>130 (196-77)</td>
                  <td>93.1 (87.7-97.0)</td>
                  <td>93.8 (89.2-97.6)</td>
                  <td>93.5 (89.3-96.6)</td>
                </tr>
                <tr valign="top">
                  <td>Overall</td>
                  <td>3494 (4194-2885)</td>
                  <td>89.1 (87.5-90.8)</td>
                  <td>88.5 (86.4-90.5)</td>
                  <td>88.8 (87.1-90.5)</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
        <sec>
          <title>Qualification Task</title>
          <p>The model was evaluated for the qualification task and achieved an <italic>F</italic><sub>1</sub>-score of 78.8 on our annotated dataset containing 103 discharge summaries from AP-HP. The results are presented in Table S10 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. On the test set, the model achieved an overall <italic>F</italic><sub>1</sub>-score of 78.8.</p>
        </sec>
        <sec>
          <title>Measurement Extraction Task</title>
          <p>The model was evaluated for the measurement extraction task on 20 annotated discharge summaries from AP-HP. Precision, recall, and <italic>F</italic><sub>1</sub>-score are presented in Table S11 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> and were measured in a “strict” way: a true positive result was obtained when a predicted measurement and a gold measurement had the same value and unit. The algorithm obtained an <italic>F</italic><sub>1</sub>-score of 96.7.</p>
        </sec>
        <sec>
          <title>Normalization Task</title>
          <p>The rule-based algorithm for drug name normalization and the deep learning algorithm for the laboratory test name normalization were both evaluated on 11 annotated discharge summaries from AP-HP, 3 documents from European medicines agency, and 833 titles from MEDLINE [<xref ref-type="bibr" rid="ref51">51</xref>]. For the evaluation, a true positive result was obtained when the predicted code of an entity was part of the list of annotated gold standard codes. Precision, recall, and <italic>F</italic><sub>1</sub>-score are presented in Table S12 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. On our dataset of discharge summaries, the rule-based method for drug names achieved an <italic>F</italic><sub>1</sub>-score of 92.9 and the neural network–based method used for laboratory tests achieved an <italic>F</italic><sub>1</sub>-score of 82.2.</p>
        </sec>
        <sec>
          <title>End-to-End Pipeline</title>
          <p>The model was evaluated on the NER and normalization task using 11 annotated discharge summaries from AP-HP. Precision, recall, and <italic>F</italic><sub>1</sub>-score were measured in a “strict” way: a true positive result was obtained when a predicted entity and a gold entity had exactly the same boundaries and label and the predicted standard code of the entity was part of the list of annotated gold standard codes. Precision, recall, and <italic>F</italic><sub>1</sub>-score are presented in <xref ref-type="table" rid="table3">Table 3</xref>. We obtained an <italic>F</italic><sub>1</sub>-score of 71.1 for laboratory tests and 89.3 for drug names.</p>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Performance of the models for named entity recognition and normalization tasks on University Hospitals of Greater Paris (Assistance Publique-Hôpitaux de Paris) discharge summaries. The model was tested on 11 discharge summaries. Each result was bootstrapped by discharge summary to provide a 95% CI given inside the brackets.</p>
            </caption>
            <table border="1" rules="groups" cellpadding="5" frame="hsides" width="1000" cellspacing="0">
              <col width="200"/>
              <col width="240"/>
              <col width="180"/>
              <col width="180"/>
              <col width="200"/>
              <thead>
                <tr valign="top">
                  <td>Label</td>
                  <td>Number of entities (95% CI)</td>
                  <td>Precision (95% CI)</td>
                  <td>Recall (95% CI)</td>
                  <td><italic>F</italic><sub>1</sub>-score (95% CI)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Laboratory test name</td>
                  <td>356 (204-548)</td>
                  <td>72.0 (65.3-79.1)</td>
                  <td>70.2 (59.7-77.9)</td>
                  <td>71.1 (63.6-77.8)</td>
                </tr>
                <tr valign="top">
                  <td>Drug name</td>
                  <td>312 (206-424)</td>
                  <td>91.9 (88.9-93.5)</td>
                  <td>86.9 (82.9-90.0)</td>
                  <td>89.3 (85.9-91.6)</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
      </sec>
      <sec>
        <title>Clinical Application</title>
        <p>For each studied disease (SLE, systemic sclerosis, APS, and TA), each studied antibody, and each studied drug treatment, we reported the number of patients for whom we extracted a positive antibody or a prescribed drug treatment from both the structured and unstructured data of the study cohort. Lists of CUI codes for the studied antibodies and ATC codes for drug treatments are available in Tables S9 and S10 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. We were particularly interested in the number of patients for whom we extracted positive antibodies or drug treatments from the unstructured data of their EHRs that were not available in the structured data. In the analysis of the unstructured data, only entities explicitly qualified as “certain” and not negated were retained.</p>
        <sec>
          <title>Laboratory Test Results</title>
          <p><xref ref-type="table" rid="table4">Table 4</xref> describes the number of antibody-positive patients for each disease, where positivity was determined either by test values exceeding reference ranges or by explicit interpretation of the test as positive. The first column shows the number of patients for whom we extracted at least 1 positive test from the structured data of their EHR. The second column shows the number of patients for whom we extracted at least 1 positive test from both the structured and unstructured data. The third column shows the number of patients for whom we extracted at least 1 positive test from the unstructured data, but only among patients without positive tests extracted from the structured data.</p>
          <p>To ensure the reliability of the detected autoantibodies across the entire study cohort, we conducted a second evaluation focused specifically on the studied autoantibodies. While the precision was reported in <xref ref-type="table" rid="table3">Table 3</xref> as 72% in the general case for all laboratory tests, this additional analysis aimed to ensure comparable performance for the autoantibodies studied. For this evaluation, we randomly selected 10 positively detected entities for each studied autoantibody, yielding a total of 110 entities. These entities, identified through NER, measurement extraction, and normalization, were reviewed by a clinician. The review identified 3 errors, resulting in a precision of 97.3%.</p>
          <p>Finally, to identify potential errors made by the algorithm, we examined EHR cases in which a positive autoantibody appeared in the structured data but was not detected in the unstructured data. For each antibody, we randomly selected 10 EHRs (for some antibodies, fewer than 10 documents met this criterion, so we included all available cases). A clinician reviewed 63 discharge summaries from the EHRs and identified 2 types of algorithmic errors: (1) in 23 (26.5%) summaries, the algorithm either failed to detect the relevant antibody or did not normalize it correctly; and (2) in the remaining 40 (63.5%) summaries, either the antibody was not mentioned in the text, or the mention was interpreted as a negative result by the clinician.</p>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>Number of patients with positive antibodies mentioned in the text and in structured data.</p>
            </caption>
            <table border="1" rules="groups" cellpadding="5" frame="hsides" width="1000" cellspacing="0">
              <col width="30"/>
              <col width="310"/>
              <col width="220"/>
              <col width="220"/>
              <col width="220"/>
              <thead>
                <tr valign="top">
                  <td colspan="2">Disease and laboratory test</td>
                  <td colspan="3">Number of patients with positive tests (ratio)</td>
                </tr>
                <tr valign="top">
                  <td colspan="2"/>
                  <td>Structured data only, n (%)</td>
                  <td>Structured and unstructured data, n (%)</td>
                  <td>Benefits of the unstructured data, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Antiphospholipid syndrome (1059 patients)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Anticardiolipin antibody</td>
                  <td>184 (17.37)</td>
                  <td>478 (45.14)</td>
                  <td>294 (27.76)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Anti-B2GP1 antibody</td>
                  <td>103 (9.73)</td>
                  <td>334 (31.54)</td>
                  <td>231 (21.81)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Lupus anticoagulant</td>
                  <td>277 (26.16)</td>
                  <td>423 (39.94)</td>
                  <td>146 (13.79)</td>
                </tr>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Systemic lupus erythematosus (4102 patients)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Antinuclear antibody</td>
                  <td>752 (18.33)</td>
                  <td>2949 (71.89)</td>
                  <td>2197 (53.56)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Anti-DNA antibodies</td>
                  <td>541 (13.19)</td>
                  <td>2174 (53)</td>
                  <td>1633 (39.81)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Anti-Smith antibodies</td>
                  <td>255 (6.22)</td>
                  <td>858 (20.92)</td>
                  <td>603 (14.7)</td>
                </tr>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Systemic sclerosis (2031 patients)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Anti-RNA polymerase III antibody</td>
                  <td>39 (1.92)</td>
                  <td>156 (7.68)</td>
                  <td>117 (5.76)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Anti-Scl-70 antibodies</td>
                  <td>97 (4.78)</td>
                  <td>568 (27.97)</td>
                  <td>471 (23.19)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Anticentromere antibody</td>
                  <td>144 (7.09)</td>
                  <td>609 (29.99)</td>
                  <td>465 (22.9)</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
        <sec>
          <title>Drug Treatments</title>
          <p><xref ref-type="table" rid="table5">Table 5</xref> describes the number of patients with drug treatments for each of the studied drugs related to the respective disease. The first column presents the number of patients for whom we extracted at least 1 drug treatment from the structured data of their EHR. The second column presents the number of patients for whom we extracted at least 1 drug treatment from both the structured and the unstructured data. The third column provides the number of patients for whom we extracted at least 1 drug treatment from the unstructured data but only among the patients without drug treatment extracted from the structured data.</p>
          <table-wrap position="float" id="table5">
            <label>Table 5</label>
            <caption>
              <p>Number of patients with drug treatments. It describes the number of patients with drug treatments for each of the studied drugs related to the respective disease, extracted from structured and unstructured data. The proportions in percentage of the total number of patients diagnosed with the respective disease are given inside parenthesis.</p>
            </caption>
            <table border="1" rules="groups" cellpadding="5" frame="hsides" width="1000" cellspacing="0">
              <col width="30"/>
              <col width="310"/>
              <col width="220"/>
              <col width="220"/>
              <col width="220"/>
              <thead>
                <tr valign="top">
                  <td colspan="2">Diseases and drugs</td>
                  <td colspan="3">Number of patients with drug treatments (ratio)</td>
                </tr>
                <tr valign="top">
                  <td colspan="2"/>
                  <td>Structured data only, n (%)</td>
                  <td>Structured and unstructured data, n (%)</td>
                  <td>Benefits of the unstructured data, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Antiphospholipid syndrome (1059 patients)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>VKA</td>
                  <td>186 (17.56)</td>
                  <td>628 (59.3)</td>
                  <td>442 (41.74)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Heparin</td>
                  <td>238 (22.47)</td>
                  <td>677 (63.94)</td>
                  <td>439 (41.46)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Oral anticoagulant</td>
                  <td>47 (4.44)</td>
                  <td>177 (16.72)</td>
                  <td>130 (12.28)</td>
                </tr>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Systemic lupus erythematosus (4102 patients)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Systemic glucocorticoids</td>
                  <td>950 (23.16)</td>
                  <td>3308 (80.64)</td>
                  <td>2358 (57.49)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Cyclophosphamide</td>
                  <td>64 (1.56)</td>
                  <td>894 (21.79)</td>
                  <td>830 (20.23)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Mycophenolate mofetil</td>
                  <td>301 (7.34)</td>
                  <td>1263 (30.78)</td>
                  <td>962 (23.46)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Rituximab</td>
                  <td>75 (1.83)</td>
                  <td>709 (17.28)</td>
                  <td>634 (15.46)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Belimumab</td>
                  <td>43 (1.05)</td>
                  <td>247 (6.02)</td>
                  <td>204 (4.97)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Methotrexate</td>
                  <td>112 (2.73)</td>
                  <td>963 (23.48)</td>
                  <td>851 (20.75)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Hydroxychloroquine</td>
                  <td>920 (22.43)</td>
                  <td>3520 (85.83)</td>
                  <td>2600 (63.4)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Prevenar 13 vaccine</td>
                  <td>122 (2.97)</td>
                  <td>984 (23.99)</td>
                  <td>862 (21.02)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Pneumovax vaccine</td>
                  <td>43 (1.05)</td>
                  <td>436 (10.63)</td>
                  <td>393 (9.58)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Influenza vaccine</td>
                  <td>96 (2.34)</td>
                  <td>577 (14.07)</td>
                  <td>481 (11.73)</td>
                </tr>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Systemic sclerosis (2031 patients)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Systemic glucocorticoids</td>
                  <td>258 (12.71)</td>
                  <td>1260 (62.06)</td>
                  <td>1002 (49.33)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Cyclophosphamide</td>
                  <td>6 (0.3)</td>
                  <td>390 (19.2)</td>
                  <td>384 (18.91)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Mycophenolate mofetil</td>
                  <td>88 (4.33)</td>
                  <td>463 (22.81)</td>
                  <td>375 (18.47)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Rituximab</td>
                  <td>13 (0.64)</td>
                  <td>258 (12.71)</td>
                  <td>245 (12.07)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Methotrexate</td>
                  <td>66 (3.25)</td>
                  <td>541 (26.63)</td>
                  <td>475 (23.39)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Prevenar 13 vaccine</td>
                  <td>59 (2.9)</td>
                  <td>545 (26.84)</td>
                  <td>486 (23.93)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Pneumovax vaccine</td>
                  <td>13 (0.64)</td>
                  <td>285 (14.03)</td>
                  <td>272 (13.4)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Influenza vaccine</td>
                  <td>42 (2.07)</td>
                  <td>425 (20.93)</td>
                  <td>383 (18.87)</td>
                </tr>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Takayasu arteritis (252 patients)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Systemic glucocorticoids</td>
                  <td>68 (27)</td>
                  <td>223 (88.49)</td>
                  <td>155 (61.51)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Cyclophosphamide</td>
                  <td>0 (0)</td>
                  <td>18 (7.14)</td>
                  <td>18 (7.14)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Tocilizumab</td>
                  <td>15 (5.95)</td>
                  <td>47 (18.65)</td>
                  <td>32 (12.7)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Mycophenolate mofetil</td>
                  <td>9 (3.57)</td>
                  <td>21 (8.33)</td>
                  <td>12 (4.76)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Rituximab</td>
                  <td>0 (0)</td>
                  <td>6 (2.38)</td>
                  <td>6 (2.38)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Methotrexate</td>
                  <td>20 (7.94)</td>
                  <td>135 (53.57)</td>
                  <td>115 (45.63)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Prevenar 13 vaccine</td>
                  <td>9 (3.57)</td>
                  <td>83 (32.94)</td>
                  <td>74 (29.37)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Pneumovax vaccine</td>
                  <td>3 (1.19)</td>
                  <td>48 (19.05)</td>
                  <td>45 (17.86)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Influenza vaccine</td>
                  <td>6 (2.38)</td>
                  <td>41 (16.27)</td>
                  <td>35 (13.89)</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <p>To ensure the precision of the drug treatments identified by the algorithm, we conducted a second evaluation specifically focused on the studied drugs. Although the general precision for all drugs had previously been reported as 91.9% in <xref ref-type="table" rid="table3">Table 3</xref>, this additional analysis aimed to confirm comparable performance for the specific drug treatments studied. For this evaluation, we randomly selected 10 positively detected entities per studied drug (spanning NER and normalization), resulting in a total of 130 entities. A clinician reviewed these entities individually and found no errors, corresponding to a precision of 100%.</p>
        </sec>
        <sec>
          <title>Inference Time and Carbon Footprint</title>
          <p>When considering scaling these methods to a CDW that may process hundreds of thousands of documents daily, it is critical to evaluate both processing speed and environmental impact. The entire NLP process on the 22,194 documents took 145 minutes on a graphics processing unit (Tesla V100-SXM2-32GB) and resulted in a total emission of 0.39 kg equivalent of CO<sub>2</sub>.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this paper, we proposed a novel block-based algorithm for extracting and normalizing medical data from text, enabling fine-grained phenotyping of patients with autoimmune or autoinflammatory diseases. We demonstrated that these cascading algorithms significantly improve patient characterization compared to relying only on structured data. In addition, we provided detailed results for every step of the algorithm (NER, qualification, measurement extraction, and normalization), evaluated our method using a publicly available dataset, Quaero [<xref ref-type="bibr" rid="ref51">51</xref>], and provided a comprehensive performance comparison between models.</p>
        <p>Our work offers several strengths. Notably, we leveraged state-of-the-art language models, particularly the BERT model, for named entity extraction. Indeed, when compared with recent large language models, such as GPT, BERT models remain the most effective for the NER task [<xref ref-type="bibr" rid="ref58">58</xref>]. We evaluated and compared several language models and various methods for each step, demonstrating strong performance outcomes. The model evaluated for the NER task on 20 annotated discharge summaries achieved high <italic>F</italic><sub>1</sub>-scores: 90.3 for laboratory test names and 91.6 for drug names. Similarly, the model achieved high <italic>F</italic><sub>1</sub>-scores for the qualification task, the measurement extraction task, and the overall end-to-end task. A posteriori precision analysis also showed very good results (97.3% for laboratory tests and 100% for drugs). Finally, as shown in <xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">Table 5</xref>, our study highlights significant improvements in information availability by enriching structured data with information extracted from unstructured data.</p>
        <p>Beyond these results, our findings are consistent with those of previous studies. For instance, 71.87% (2949/4102) of patients in the lupus cohort exhibited positive antinuclear antibodies (≥1/80), a finding that aligns with the clinical criteria for the disease [<xref ref-type="bibr" rid="ref40">40</xref>]. Similarly, when compared with previous data [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref40">40</xref>], 85.81% (3520/4102) of patients with SLE were treated with hydroxychloroquine, and 80.64% (3308/4102) received corticosteroids during hospitalization. For comparison, a recent conference abstract by Eviatar et al [<xref ref-type="bibr" rid="ref59">59</xref>] reported that 81% of patients were treated with hydroxychloroquine, 65% with systemic corticosteroids, and 55% with immunosuppressants (2259/4102, 55.07% in our study). In addition, 64.4% (682/1059) of patients with APS had at least one positive antibody assay. For patients with TA, the treatments were consistent with national recommendations [<xref ref-type="bibr" rid="ref60">60</xref>], with 88.5% (223/252) of patients receiving systemic corticosteroids and 18.7% (47/252) treated with tocilizumab.</p>
        <p>The clinical implications of algorithms that enable accurate patient phenotyping are substantial. They facilitate more precise recruitment of patients for studies, particularly therapeutic trials, and support clinical practice by addressing key questions, such as, “What happened to a patient like mine?” Prototypes are currently under development to construct cohorts of patients with similar characteristics to a specific individual under care, using information extracted from hospital reports. The algorithm we present can identify patients with comparable immune profiles (eg, matching positive antibodies) and analyze the treatments they received, offering valuable insights for personalized care.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>However, there are several limitations to our study. A significant limitation lies in the complexity of standardizing laboratory tests, especially for tests with abbreviated terms. For instance, the glomerular filtration rate (or “DFG” in French) is not directly classified as a biological test in the SNOMED CT US edition [<xref ref-type="bibr" rid="ref36">36</xref>], making it challenging to standardize. Similarly, the abbreviation “ACC” for lupus circulating anticoagulant is missing in the UMLS [<xref ref-type="bibr" rid="ref20">20</xref>], which makes normalization difficult and partially explains the lower contribution of text-based analysis for this assay. In general, drug names are often written in a relatively standardized format in texts (using either trade names or generics), whereas the terminology for describing biological data tends to be more varied. For example, a clinician might describe “hemoglobin” using variations, such as “anemia at 9g/dL,” “Hb=9g/dL,” or “hemoglobin at 9,” among others. This variability complicates the normalization process for laboratory tests, leading to poorer performance compared with that of drug treatments. Another limitation is the relatively small evaluation sample size. Our NLP end-to-end system was evaluated on only 11 annotated clinical documents, comprising 668 annotated entities. This limited dataset is a consequence of the labor-intensive process involved in manually annotating CUIs and ATC codes, which constrained the number of documents we could feasibly annotate. Also, interannotator agreement could not be computed due to having a single annotator involved in the annotation process. To minimize potential biases, several precautions were taken. First, an expert clinician performed the annotations following strict guidelines, while the model was independently designed by a separate researcher. Second, the training and test datasets were created using distinct discharge summaries from different patients. These precautions reduce the risk of information leakage during model evaluation.</p>
        <p>Finally, it is important to note that this study relies on the secondary use of “real-life” health care data. While clinical texts are central to characterizing patients, as demonstrated, they do not comprehensively capture all patient characteristics. Our error analysis revealed that for patients with both textual information and biological test results from the same hospitalization, 63% (40/63) of the biological tests were either not mentioned in the text or were interpreted by the clinician as negative results. To enhance the accuracy of patient phenotyping, we believe it is essential to incorporate both structured and textual data.</p>
      </sec>
      <sec>
        <title>Future Works</title>
        <p>Moreover, we acknowledge that the analyses presented here are preliminary for each pathology, and we anticipate more detailed future work in this area. Particularly, it will be necessary to establish a precise relationship between target organ damage and antibody positivity, some of which are known to be more specific for certain types of damage. For example, anti-RNA polymerase III antibodies are more often associated with sclerodermic renal crisis [<xref ref-type="bibr" rid="ref61">61</xref>], and triple positivity of APS antibodies is also a poor prognostic marker. The type and severity of organ damage should also be considered in conjunction with treatment options. These analyses will also be based on our current patient phenotyping work [<xref ref-type="bibr" rid="ref62">62</xref>]. Analysis of the dosages associated with each treatment is not currently explored either, but work is in progress for this future step. Another direction is adapting our methodology to other languages. While the current implementation is tailored for French, the approach can be generalized by substituting the pretrained clinical BERT model with other language-specific alternatives, such as models pretrained for Spanish [<xref ref-type="bibr" rid="ref63">63</xref>] or English [<xref ref-type="bibr" rid="ref64">64</xref>]. However, successful adaptation would require annotated datasets specific to the new language, as well as adjustments to the terminology and clinical standards used in the target CDW. Beyond linguistic adaptability, the methods described could also be extended to unstructured data in different formats, such as imaging. Addressing these directions could advance this research toward a more comprehensive, multilingual, and multiformat phenotyping framework.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>To the best of our knowledge, this is the first study to automatically analyze such a large volume of patients with autoimmune diseases using data derived directly from text. It seems to us that this finer, text-based characterization of patients in the context of rare diseases could enable researchers to target them more effectively, and clinicians to bring synthesis to their management.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Annotation guidelines, supplementary tables, and figure.</p>
        <media xlink:href="medinform_v13i1e68704_app1.docx" xlink:title="DOCX File , 571 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AP-HP</term>
          <def>
            <p>University Hospitals of Greater Paris (Assistance publique-hôpitaux de Paris)</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">APS</term>
          <def>
            <p>antiphospholipid syndrome</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ATC</term>
          <def>
            <p>Anatomical Therapeutic Chemical</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CDW</term>
          <def>
            <p>clinical data warehouse</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CUI</term>
          <def>
            <p>concept unique identifier</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">ICD-10</term>
          <def>
            <p>International Classification of Diseases, Tenth Revision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">NER</term>
          <def>
            <p>named entity recognition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">SLE</term>
          <def>
            <p>systemic lupus erythematosus</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">SNOMED CT</term>
          <def>
            <p>Systematized Nomenclature of Medicine Clinical Terms</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">TA</term>
          <def>
            <p>Takayasu arteritis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors thank the clinical data warehouse of the Greater Paris University Hospitals for its support and the realization of data management and data curation tasks. The authors express sincere thanks to Dr Arthur Mageau, Prof Karim Sacré, and Prof Olivier Steichen for their careful review of this manuscript.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The datasets generated or analyzed during this study are not publicly available due to their classification as sensitive data under the General Data Protection Regulation and the National Data Protection Commission. Access to the data requires prior approval from the local institutional review board and must follow the process outlined on its website [<xref ref-type="bibr" rid="ref65">65</xref>]. In the case of non-University Hospitals of Greater Paris (Assistance Publique-Hôpitaux de Paris) researchers, the signature of a collaboration contract is mandatory.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Richesson</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Hammond</surname>
              <given-names>WE</given-names>
            </name>
            <name name-style="western">
              <surname>Nahm</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wixted</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Bauck</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Cifelli</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Smerek</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Dickerson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Laws</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Madigan</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Rusincovitch</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Kluchar</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Califf</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Electronic health records based phenotyping in next-generation clinical trials: a perspective from the NIH Health Care Systems Collaboratory</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <month>12</month>
          <day>01</day>
          <volume>20</volume>
          <issue>e2</issue>
          <fpage>e226</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23956018"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001926</pub-id>
          <pub-id pub-id-type="medline">23956018</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2013-001926</pub-id>
          <pub-id pub-id-type="pmcid">PMC3861929</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gombar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Callahan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Califf</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Harrington</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>It is time to learn from patients like mine</article-title>
          <source>NPJ Digit Med</source>
          <year>2019</year>
          <month>03</month>
          <day>19</day>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>16</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-019-0091-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-019-0091-3</pub-id>
          <pub-id pub-id-type="medline">31304364</pub-id>
          <pub-id pub-id-type="pii">91</pub-id>
          <pub-id pub-id-type="pmcid">PMC6550176</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Callahan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Polony</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Posada</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Banda</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Gombar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>ACE: the Advanced Cohort Engine for searching longitudinal patient records</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>07</month>
          <day>14</day>
          <volume>28</volume>
          <issue>7</issue>
          <fpage>1468</fpage>
          <lpage>79</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33712854"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab027</pub-id>
          <pub-id pub-id-type="medline">33712854</pub-id>
          <pub-id pub-id-type="pii">6169466</pub-id>
          <pub-id pub-id-type="pmcid">PMC8279796</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Frankovich</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Longhurst</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Sutherland</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>Evidence-based medicine in the EMR era</article-title>
          <source>N Engl J Med</source>
          <year>2011</year>
          <month>11</month>
          <day>10</day>
          <volume>365</volume>
          <issue>19</issue>
          <fpage>1758</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1056/nejmp1108726</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ackerson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sy</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Daily</surname>
              <given-names>LI</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ku</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>HF</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing versus diagnosis code-based methods for postherpetic neuralgia identification: algorithm development and validation</article-title>
          <source>JMIR Med Inform</source>
          <year>2024</year>
          <month>09</month>
          <day>10</day>
          <volume>12</volume>
          <fpage>e57949</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2024//e57949/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/57949</pub-id>
          <pub-id pub-id-type="medline">39254589</pub-id>
          <pub-id pub-id-type="pii">v12i1e57949</pub-id>
          <pub-id pub-id-type="pmcid">PMC11407135</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elkin</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Mullin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mardekian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Crowner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sakilay</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sinha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Brady</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nolen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Trainer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Koppel</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Schlegel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kaushik</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Anand</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Using artificial intelligence with natural language processing to combine electronic health record's structured and free text data to identify nonvalvular atrial fibrillation to decrease strokes and death: evaluation and case-control study</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>11</month>
          <day>09</day>
          <volume>23</volume>
          <issue>11</issue>
          <fpage>e28946</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/11/e28946/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/28946</pub-id>
          <pub-id pub-id-type="medline">34751659</pub-id>
          <pub-id pub-id-type="pii">v23i11e28946</pub-id>
          <pub-id pub-id-type="pmcid">PMC8663460</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seinen</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Fridgeirsson</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Ioannou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jeannetot</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>John</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Kors</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Markus</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Pera</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Rekkas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>van Mulligen</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Rijnbeek</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Use of unstructured text in prognostic clinical prediction models: a systematic review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2022</year>
          <month>06</month>
          <day>14</day>
          <volume>29</volume>
          <issue>7</issue>
          <fpage>1292</fpage>
          <lpage>302</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35475536"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocac058</pub-id>
          <pub-id pub-id-type="medline">35475536</pub-id>
          <pub-id pub-id-type="pii">6574714</pub-id>
          <pub-id pub-id-type="pmcid">PMC9196702</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khurshid</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Reeder</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Harrington</surname>
              <given-names>LX</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sarma</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>SF</given-names>
            </name>
            <name name-style="western">
              <surname>Di Achille</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Diamant</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Cunningham</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Lau</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Haimovich</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Alusi</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Klarqvist</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Ashburner</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Diedrich</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ghadessi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mielke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Eilken</surname>
              <given-names>HM</given-names>
            </name>
            <name name-style="western">
              <surname>McElhinney</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Derix</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Atlas</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ellinor</surname>
              <given-names>PT</given-names>
            </name>
            <name name-style="western">
              <surname>Philippakis</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Batra</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lubitz</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>Cohort design and natural language processing to reduce bias in electronic health records research</article-title>
          <source>NPJ Digit Med</source>
          <year>2022</year>
          <month>04</month>
          <day>08</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>47</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-022-00590-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-022-00590-0</pub-id>
          <pub-id pub-id-type="medline">35396454</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-022-00590-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC8993873</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Idnay</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ta</surname>
              <given-names>CN</given-names>
            </name>
            <name name-style="western">
              <surname>Schelke</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Marder</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Mini-mental status examination phenotyping for Alzheimer's disease patients using both structured and narrative electronic health record features</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2025</year>
          <month>01</month>
          <day>01</day>
          <volume>32</volume>
          <issue>1</issue>
          <fpage>119</fpage>
          <lpage>28</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocae274</pub-id>
          <pub-id pub-id-type="medline">39520712</pub-id>
          <pub-id pub-id-type="pii">7888957</pub-id>
          <pub-id pub-id-type="pmcid">PMC11648712</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fraile Navarro</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ijaz</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rezazadegan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rahimi-Ardabili</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Dras</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Coiera</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Berkovsky</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Clinical named entity recognition and relation extraction using natural language processing of medical free text: a systematic review</article-title>
          <source>Int J Med Inform</source>
          <year>2023</year>
          <month>09</month>
          <volume>177</volume>
          <fpage>105122</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1386-5056(23)00140-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2023.105122</pub-id>
          <pub-id pub-id-type="medline">37295138</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(23)00140-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moqurrab</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Ayub</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Anjum</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Asghar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Srivastava</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>An accurate deep learning model for clinical entity recognition from clinical notes</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2021</year>
          <month>10</month>
          <volume>25</volume>
          <issue>10</issue>
          <fpage>3804</fpage>
          <lpage>11</lpage>
          <pub-id pub-id-type="doi">10.1109/jbhi.2021.3099755</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikheev</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Moens</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Grover</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Named Entity recognition without gazetteers</article-title>
          <source>Proceedings of the 9th conference on European chapter of the Association for Computational Linguistics</source>
          <year>1999</year>
          <conf-name>EACL '99</conf-name>
          <conf-date>June 8-12, 1999</conf-date>
          <conf-loc>Bergen, Norway</conf-loc>
          <fpage>1</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.3115/977035.977037"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/977035.977037</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ramshaw</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Church</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Isabelle</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Manzi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tzoukermann</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Yarowsky</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Text chunking using transformation-based learning</article-title>
          <source>Natural Language Processing Using Very Large Corpora</source>
          <year>1995</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>157</fpage>
          <lpage>76</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ratinov</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Design challenges and misconceptions in named entity recognition</article-title>
          <source>Proceedings of the 13th Conference on Computational Natural Language Learning</source>
          <year>2009</year>
          <conf-name>CoNLL '09</conf-name>
          <conf-date>June 4-5, 2009</conf-date>
          <conf-loc>Boulder, Colorado</conf-loc>
          <fpage>147</fpage>
          <lpage>55</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/W09-1119"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/1596374.1596399</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lafferty</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>McCallum</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pereira</surname>
              <given-names>FC</given-names>
            </name>
          </person-group>
          <article-title>Conditional random fields: probabilistic models for segmenting and labeling sequence data</article-title>
          <source>Proceedings of the 18th International Conference on Machine Learning</source>
          <year>2001</year>
          <conf-name>ICML '01</conf-name>
          <conf-date>June 28- July 1, 2001</conf-date>
          <conf-loc>San FranciscoCA</conf-loc>
          <fpage>282</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/645530.655813"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/1015330.1015422</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lample</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ballesteros</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Subramanian</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kawakami</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dyer</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Neural architectures for named entity recognition</article-title>
          <source>Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2016</year>
          <conf-name>NAACL '16</conf-name>
          <conf-date>June 12-17, 2016</conf-date>
          <conf-loc>San Diego, CA</conf-loc>
          <fpage>260</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/N16-1030.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/n16-1030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sung</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jeong</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BERN2: an advanced neural biomedical named entity recognition and normalization tool</article-title>
          <source>Bioinformatics</source>
          <year>2022</year>
          <month>10</month>
          <day>14</day>
          <volume>38</volume>
          <issue>20</issue>
          <fpage>4837</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36053172"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btac598</pub-id>
          <pub-id pub-id-type="medline">36053172</pub-id>
          <pub-id pub-id-type="pii">6687126</pub-id>
          <pub-id pub-id-type="pmcid">PMC9563680</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jonker</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Antunes</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Matos</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Multi-head CRF classifier for biomedical multi-class named entity recognition on Spanish clinical notes</article-title>
          <source>Database (Oxford)</source>
          <year>2024</year>
          <month>07</month>
          <day>30</day>
          <volume>2024</volume>
          <fpage>baae068</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/database/article-lookup/doi/10.1093/database/baae068"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/database/baae068</pub-id>
          <pub-id pub-id-type="medline">39083461</pub-id>
          <pub-id pub-id-type="pii">7724924</pub-id>
          <pub-id pub-id-type="pmcid">PMC11290360</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cardon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Grabar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Grouin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hamon</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Presentation of the’assessment campaign DEFT 2020: textual similarity in open domain and extraction of’accurate information in clinical cases (presentation of the DEFT 2020 challenge : open domain textual similarity and precise information extraction from clinical cases)</article-title>
          <source>Proceedings of the 6th joint conference Days of Studies on the Word (JEP, 33rd edition), Automatic Processing of Natural Languages (TALN, 27th edition), Meeting of Research Students in Computer Science for Automatic Language Processing (RECITAL, 22nd edition). Workshop Defi Fouille de Textes</source>
          <year>2020</year>
          <conf-name>JEP/TALN/RECITAL '20</conf-name>
          <conf-date>June 8-19, 2020</conf-date>
          <conf-loc>Nancy, France</conf-loc>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.jeptalnrecital-deft.1.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.4000/books.pufc.30067</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The Unified Medical Language System (UMLS): integrating biomedical terminology</article-title>
          <source>Nucleic Acids Res</source>
          <year>2004</year>
          <month>01</month>
          <day>01</day>
          <volume>32</volume>
          <issue>Database issue</issue>
          <fpage>D267</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/14681409"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id>
          <pub-id pub-id-type="medline">14681409</pub-id>
          <pub-id pub-id-type="pii">32/suppl_1/D267</pub-id>
          <pub-id pub-id-type="pmcid">PMC308795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Irani</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>Wehbe</surname>
              <given-names>FH</given-names>
            </name>
            <name name-style="western">
              <surname>Smithers</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Spickard</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The KnowledgeMap project: development of a concept-based medical school curriculum database</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2003</year>
          <volume>2003</volume>
          <fpage>195</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/14728161"/>
          </comment>
          <pub-id pub-id-type="medline">14728161</pub-id>
          <pub-id pub-id-type="pii">D030003640</pub-id>
          <pub-id pub-id-type="pmcid">PMC1480333</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Lang</surname>
              <given-names>FM</given-names>
            </name>
          </person-group>
          <article-title>An overview of MetaMap: historical perspective and recent advances</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>229</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20442139"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.002733</pub-id>
          <pub-id pub-id-type="medline">20442139</pub-id>
          <pub-id pub-id-type="pii">17/3/229</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shagina</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Socratous</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>A WEB-based version of MedLEE: a medical language extraction and encoding system</article-title>
          <source>Proc AMIA Annu Fall Symp</source>
          <year>1996</year>
          <volume>938</volume>
          <fpage>35</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pmc.ncbi.nlm.nih.gov/articles/PMC2233000/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Stenner</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Doan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Waitman</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>MedEx: a medication information extraction system for clinical narratives</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <month>01</month>
          <day>01</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>19</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1197/jamia.m3378</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>QT</given-names>
            </name>
            <name name-style="western">
              <surname>Goryachev</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sordo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Lazarus</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Extracting principal diagnosis, co-morbidity and smoking status for asthma research: evaluation of a natural language processing system</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2006</year>
          <month>07</month>
          <day>26</day>
          <volume>6</volume>
          <fpage>30</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-6-30"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1472-6947-6-30</pub-id>
          <pub-id pub-id-type="medline">16872495</pub-id>
          <pub-id pub-id-type="pii">1472-6947-6-30</pub-id>
          <pub-id pub-id-type="pmcid">PMC1553439</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Masanz</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ogren</surname>
              <given-names>PV</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kipper-Schuler</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
          </person-group>
          <article-title>Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): architecture, component evaluation and applications</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>507</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20819853"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.001560</pub-id>
          <pub-id pub-id-type="medline">20819853</pub-id>
          <pub-id pub-id-type="pii">17/5/507</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995668</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bejan</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderwende</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wurfel</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Yetisgen-Yildiz</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Pneumonia identification using statistical feature selection</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2012</year>
          <month>09</month>
          <day>01</day>
          <volume>19</volume>
          <issue>5</issue>
          <fpage>817</fpage>
          <lpage>23</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22539080"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000752</pub-id>
          <pub-id pub-id-type="medline">22539080</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000752</pub-id>
          <pub-id pub-id-type="pmcid">PMC3422830</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gainer</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Goryachev</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng-treitler</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Raychaudhuri</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Churchill</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Karlson</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Plenge</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Electronic medical records for discovery research in rheumatoid arthritis</article-title>
          <source>Arthritis Care Res (Hoboken)</source>
          <year>2010</year>
          <month>08</month>
          <volume>62</volume>
          <issue>8</issue>
          <fpage>1120</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20235204"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/acr.20184</pub-id>
          <pub-id pub-id-type="medline">20235204</pub-id>
          <pub-id pub-id-type="pmcid">PMC3121049</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Eyler</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Naïve electronic health record phenotype identification for rheumatoid arthritis</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2011</year>
          <volume>2011</volume>
          <fpage>189</fpage>
          <lpage>96</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22195070"/>
          </comment>
          <pub-id pub-id-type="medline">22195070</pub-id>
          <pub-id pub-id-type="pmcid">PMC3243261</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2019</year>
          <conf-name>NAACL '19</conf-name>
          <conf-date>June 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, Minnesota</conf-loc>
          <fpage>4171</fpage>
          <lpage>86</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/N19-1423.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/n18-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>French</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>McInnes</surname>
              <given-names>BT</given-names>
            </name>
          </person-group>
          <article-title>An overview of biomedical entity linking throughout the years</article-title>
          <source>J Biomed Inform</source>
          <year>2023</year>
          <month>01</month>
          <volume>137</volume>
          <fpage>104252</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(22)00257-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2022.104252</pub-id>
          <pub-id pub-id-type="medline">36464228</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(22)00257-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC9845184</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Shareghi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Basaldella</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Collier</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Self-alignment pretraining for biomedical entity representations</article-title>
          <source>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2021</year>
          <conf-name>NAACL '21</conf-name>
          <conf-date>June 6-11, 2021</conf-date>
          <conf-loc>Virtual Event</conf-loc>
          <fpage>4228</fpage>
          <lpage>38</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2021.naacl-main.334.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2021.naacl-main.334</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>CODER: knowledge-infused cross-lingual medical term embedding for term normalization</article-title>
          <source>J Biomed Inform</source>
          <year>2022</year>
          <month>02</month>
          <volume>126</volume>
          <fpage>103983</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(21)00312-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2021.103983</pub-id>
          <pub-id pub-id-type="medline">34990838</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(21)00312-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Achiam</surname>
              <given-names>OJ</given-names>
            </name>
            <name name-style="western">
              <surname>Adler</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmad</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Akkaya</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Aleman</surname>
              <given-names>FL</given-names>
            </name>
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Altenschmidt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Anadkat</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Avila</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Babuschkin</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Balaji</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Balcom</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Baltescu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bavarian</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Belgum</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bello</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Berdine</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bernadett-Shapiro</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Berner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bogdonoff</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Boiko</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Brakman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brockman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Brooks</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Brundage</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Button</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cann</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Carey</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Carlson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Carmichael</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chantzis</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chess</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cummings</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Currier</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Decareaux</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Degry</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Deutsch</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Deville</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dhar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dohan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dowling</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dunning</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ecoffet</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Eleti</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Eloundou</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Farhi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Fedus</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Felix</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Fishman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Forte</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fulford</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Georges</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gibson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Goel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Gogineni</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Goh</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gontijo-Lopes</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Grafstein</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Greene</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gross</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hallacy</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Heaton</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Heidecke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hesse</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hickey</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hickey</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hoeschele</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Houghton</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Huizinga</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jomoto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jonn</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Jun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kaftan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kamali</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kanitscheider</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Keskar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kilpatrick</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kirchner</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kiros</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Knight</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kokotajlo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kondraciuk</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kondrich</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Konstantinidis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kosic</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Krueger</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Lampe</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lan</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Leike</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Litwin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lowe</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lue</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Makanju</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Malfacini</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Markov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Markovski</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Mayer</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mayne</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McGrew</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>McKinney</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>McLeavey</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McMillan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>McNeil</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Medina</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Menick</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Metz</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mishchenko</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mishkin</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Monaco</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Morikawa</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Mossing</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Murati</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Murk</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>M?ely</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nair</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nakano</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nayak</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Neelakantan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ngo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Noh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Long</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>O?Keefe</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pachocki</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Paino</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Palermo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pantuliano</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Parascandolo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Parish</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Parparita</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pavlov</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Perelman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Peres</surname>
              <given-names>FD</given-names>
            </name>
            <name name-style="western">
              <surname>Petrov</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pinto</surname>
              <given-names>HD</given-names>
            </name>
            <name name-style="western">
              <surname>Pokorny</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pokrass</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pong</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Powell</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Power</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Power</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Proehl</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Puri</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rae</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ramesh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Raymond</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Real</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rimbach</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rotsted</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Roussez</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ryder</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Saltarelli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sanders</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Santurkar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sastry</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Schnurr</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Schulman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Selsam</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sheppard</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sherbakov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Shieh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shoker</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shyam</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sidor</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sigler</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Simens</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sitkin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Slama</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sohl</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Sokolowsky</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Staudacher</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Such</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Summers</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tezak</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tillet</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Tootoonchian</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Tuggle</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Turley</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tworek</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Uribe</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vallone</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vijayvergiya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Voss</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wainwright</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ward</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Weinmann</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Welihinda</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Welinder</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wiethoff</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Willner</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Winter</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wolrich</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Workman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zaremba</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zellers</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhuang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhuk</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zoph</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>GPT-4 technical report</article-title>
          <source>arXiv. Preprint posted online March 4, 2024</source>
          <year>2024</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2303.08774"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Yeganova</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Comeau</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Islamaj</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kapoor</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Opportunities and challenges for ChatGPT and large language models in biomedicine and health</article-title>
          <source>Brief Bioinform</source>
          <year>2023</year>
          <month>11</month>
          <day>22</day>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>bbad493</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38168838"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bib/bbad493</pub-id>
          <pub-id pub-id-type="medline">38168838</pub-id>
          <pub-id pub-id-type="pii">7505071</pub-id>
          <pub-id pub-id-type="pmcid">PMC10762511</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <article-title>SNOMED CT</article-title>
          <source>US National Library of Medicine</source>
          <access-date>2024-07-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nlm.nih.gov/healthit/snomedct/index.html">https://www.nlm.nih.gov/healthit/snomedct/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <article-title>Anatomical therapeutic chemical (ATC) classification</article-title>
          <source>World Health Organization</source>
          <access-date>2024-05-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/tools/atc-ddd-toolkit/atc-classification">https://www.who.int/tools/atc-ddd-toolkit/atc-classification</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fanouriakis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kostopoulou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Alunno</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Aringer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bajema</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Boletis</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Cervera</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Doria</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Govoni</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Houssiau</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jayne</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kouloumas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kuhn</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Larsen</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Lerstrøm</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Moroni</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mosca</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schneider</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Smolen</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Svenungsson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Tesar</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Tincani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Troldborg</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>van Vollenhoven</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wenzel</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bertsias</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Boumpas</surname>
              <given-names>DT</given-names>
            </name>
          </person-group>
          <article-title>2019 update of the EULAR recommendations for the management of systemic lupus erythematosus</article-title>
          <source>Ann Rheum Dis</source>
          <year>2019</year>
          <month>06</month>
          <volume>78</volume>
          <issue>6</issue>
          <fpage>736</fpage>
          <lpage>45</lpage>
          <pub-id pub-id-type="doi">10.1136/annrheumdis-2019-215089</pub-id>
          <pub-id pub-id-type="medline">30926722</pub-id>
          <pub-id pub-id-type="pii">S0003-4967(24)00642-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aringer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Costenbader</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Daikh</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brinks</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mosca</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ramsey-Goldman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Smolen</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Wofsy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Boumpas</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Kamen</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Jayne</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cervera</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Costedoat-Chalumeau</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Diamond</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gladman</surname>
              <given-names>DD</given-names>
            </name>
            <name name-style="western">
              <surname>Hahn</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hiepe</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Khanna</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lerstrøm</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Massarotti</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>McCune</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ruiz-Irastorza</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sanchez-Guerrero</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schneider</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Urowitz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bertsias</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hoyer</surname>
              <given-names>BF</given-names>
            </name>
            <name name-style="western">
              <surname>Leuchten</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tani</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tedeschi</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Touma</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Schmajuk</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Anic</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Assan</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Clarke</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Crow</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Czirják</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Doria</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Graninger</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Halda-Kiss</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hasni</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Izmirly</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kumánovics</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mariette</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Padjen</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Pego-Reigosa</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Romero-Diaz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rúa-Figueroa Fernández</surname>
              <given-names>Í</given-names>
            </name>
            <name name-style="western">
              <surname>Seror</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Stummvoll</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Tanaka</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tektonidou</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Vasconcelos</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vital</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Yavuz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Meroni</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Fritzler</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Naden</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dörner</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>SR</given-names>
            </name>
          </person-group>
          <article-title>2019 European League against rheumatism/American College of Rheumatology Classification Criteria for Systemic Lupus Erythematosus</article-title>
          <source>Arthritis Rheumatol</source>
          <year>2019</year>
          <month>09</month>
          <day>06</day>
          <volume>71</volume>
          <issue>9</issue>
          <fpage>1400</fpage>
          <lpage>12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://hdl.handle.net/2027.42/151247"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/art.40930</pub-id>
          <pub-id pub-id-type="medline">31385462</pub-id>
          <pub-id pub-id-type="pmcid">PMC6827566</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <article-title>Lupus Systémique de l'adulte et de l'enfant</article-title>
          <source>Haute Autorité de Santé</source>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.has-sante.fr/jcms/p_3493410/fr/lupus-systemique-de-l-adulte-et-de-l-enfant">https://www.has-sante.fr/jcms/p_3493410/fr/lupus-systemique-de-l-adulte-et-de-l-enfant</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <article-title>Sclérodermie Systémique</article-title>
          <source>Haute Autorité de Santé</source>
          <year>2018</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.has-sante.fr/jcms/c_717292/fr/sclerodermie-systemique">https://www.has-sante.fr/jcms/c_717292/fr/sclerodermie-systemique</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <article-title>Syndrome des Anti-Phospholipides de l’adulte et de l’enfant</article-title>
          <source>Haute Autorité de Santé</source>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.has-sante.fr/jcms/p_3375791/fr/syndrome-des-anti-phospholipides-de-l-adulte-et-de-l-enfant">https://www.has-sante.fr/jcms/p_3375791/fr/syndrome-des-anti-phospholipides-de-l-adulte-et-de-l-enfant</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="web">
          <article-title>i2b2: informatics for integrating biology and the bedside</article-title>
          <source>i2b2</source>
          <access-date>2024-04-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.i2b2.org/">https://www.i2b2.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mahajan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Tsou</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>Ö</given-names>
            </name>
          </person-group>
          <article-title>Overview of the 2022 n2c2 shared task on contextualized medication event extraction in clinical notes</article-title>
          <source>J Biomed Inform</source>
          <year>2023</year>
          <month>08</month>
          <volume>144</volume>
          <fpage>104432</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(23)00153-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2023.104432</pub-id>
          <pub-id pub-id-type="medline">37356640</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(23)00153-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC10529825</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dura</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Jean</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tannier</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Calliger</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bey</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Neuraz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Flicoteaux</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Learning structures of the French clinical language:development and validation of word embedding models using 21 million clinical reports from electronic health records</article-title>
          <source>arXiv. Preprint posted online July 26, 2022</source>
          <year>2022</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2207.12940"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2207.12940</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Muller</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Suárez</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dupont</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Romary</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>de la Clergerie</surname>
              <given-names>ÉV</given-names>
            </name>
            <name name-style="western">
              <surname>Seddah</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sagot</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>CamemBERT: a tasty French language model</article-title>
          <source>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2020</year>
          <conf-name>ACL '20</conf-name>
          <conf-date>July 5-10, 2020</conf-date>
          <conf-loc>Virtual Event</conf-loc>
          <fpage>7203</fpage>
          <lpage>19</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.acl-main.645.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.645</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Touchent</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Romary</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>de La Clergerie</surname>
              <given-names>É</given-names>
            </name>
          </person-group>
          <article-title>CamemBERT-bio: leveraging continual pre-training for cost-effective models on French biomedical data</article-title>
          <source>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation</source>
          <year>2024</year>
          <conf-name>LREC-COLING '24</conf-name>
          <conf-date>May 20-25, 2024</conf-date>
          <conf-loc>Torino, Italia</conf-loc>
          <fpage>2692</fpage>
          <lpage>701</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2024.lrec-main.241.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Labrak</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bazoge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dufour</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rouvier</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Morin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Daille</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gourraud</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>DrBERT: a robust pre-trained model in French for biomedical and clinical domains</article-title>
          <source>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics</source>
          <year>2023</year>
          <conf-name>ACL '23</conf-name>
          <conf-date>July 9-14, 2023</conf-date>
          <conf-loc>Toronto, Canada</conf-loc>
          <fpage>16207</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2023.acl-long.896.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2023.acl-long.896</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Winkler</surname>
              <given-names>WE</given-names>
            </name>
          </person-group>
          <article-title>String comparator metrics and enhanced decision rules in the Fellegi-Sunter model of record linkage</article-title>
          <source>Bureau of the Census</source>
          <year>2022</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://files.eric.ed.gov/fulltext/ED325505.pdf">https://files.eric.ed.gov/fulltext/ED325505.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Levenshtein</surname>
              <given-names>VI</given-names>
            </name>
          </person-group>
          <article-title>Binary codes capable of correcting deletions, insertions, and reversals</article-title>
          <source>Sov Phys Dokl</source>
          <year>1965</year>
          <volume>10</volume>
          <issue>8</issue>
          <fpage>707</fpage>
          <lpage>10</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grouin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Leixa</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Névéol</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rosset</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tannier</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zweigenbaum</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The Quaero French Medical Corpus: a ressource for medical entity recognition and normalization</article-title>
          <source>paperswithcode</source>
          <access-date>2024-03-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://paperswithcode.com/paper/the-quaero-french-medical-corpus-a-ressource">https://paperswithcode.com/paper/the-quaero-french-medical-corpus-a-ressource</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mahajan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Tsou</surname>
              <given-names>CH</given-names>
            </name>
          </person-group>
          <article-title>Toward understanding clinical context of medication change events in clinical narratives</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2021</year>
          <volume>2021</volume>
          <fpage>833</fpage>
          <lpage>42</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35308981"/>
          </comment>
          <pub-id pub-id-type="medline">35308981</pub-id>
          <pub-id pub-id-type="pii">3577060</pub-id>
          <pub-id pub-id-type="pmcid">PMC8861744</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Remaki</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>BioMedics</article-title>
          <source>Zenodo</source>
          <year>2022</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://zenodo.org/records/13838918">https://zenodo.org/records/13838918</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wajsburt</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Petit-Jean</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dura</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jean</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bey</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>EDS-NLP: efficient information extraction from French clinical notes</article-title>
          <source>zenodo</source>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://zenodo.org/records/11238626">https://zenodo.org/records/11238626</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>Ł</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>Proceedings of the 31st International Conference on Neural Information Processing Systems</source>
          <year>2017</year>
          <conf-name>NIPS '17</conf-name>
          <conf-date>December 4-9, 2017</conf-date>
          <conf-loc>Long Beach, CA</conf-loc>
          <fpage>6000</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/3295222.3295349"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abas</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Elhenawy</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Zidan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Othman</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>BERT-CNN: a deep learning model for detecting emotions from text</article-title>
          <source>Comput Mater Contin</source>
          <year>2021</year>
          <volume>71</volume>
          <issue>2</issue>
          <fpage>2943</fpage>
          <lpage>61</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.sciencedirect.com/org/science/article/pii/S1546221821001314"/>
          </comment>
          <pub-id pub-id-type="doi">10.32604/cmc.2022.021671</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="web">
          <article-title>Catalogue des terminologies</article-title>
          <source>Ministère du Travail, de la Santé et des Solidarités &#38; ANS</source>
          <access-date>2025-01-08</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://smt.esante.gouv.fr/catalogue-des-terminologies/">https://smt.esante.gouv.fr/catalogue-des-terminologies/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dekking</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Kraaikamp</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lopuhaä</surname>
              <given-names>HP</given-names>
            </name>
            <name name-style="western">
              <surname>Meester</surname>
              <given-names>LE</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Dekking</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Kraaikamp</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lopuhaä</surname>
              <given-names>HP</given-names>
            </name>
            <name name-style="western">
              <surname>Meester</surname>
              <given-names>LE</given-names>
            </name>
          </person-group>
          <article-title>The bootstrap</article-title>
          <source>A Modern Introduction to Probability and Statistics: Understanding Why and How</source>
          <year>2005</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>269</fpage>
          <lpage>84</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eviatar</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yahalom</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Livnat</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Elboim</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Elkayam</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Chodick</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenberg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Paran</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Real-world treatment patterns in patients with systemic lupus erythematosus: associations with comorbidities and damage</article-title>
          <source>Lupus Sci Med</source>
          <year>2024</year>
          <month>09</month>
          <day>24</day>
          <volume>11</volume>
          <issue>2</issue>
          <fpage>25</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://lupus.bmj.com/lookup/pmidlookup?view=long&#38;pmid=39317452"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/lupus-2024-001266</pub-id>
          <pub-id pub-id-type="medline">39317452</pub-id>
          <pub-id pub-id-type="pii">11/2/e001266</pub-id>
          <pub-id pub-id-type="pmcid">PMC11423723</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="web">
          <article-title>Artérite de Takayasu</article-title>
          <source>Haute Autorité de Santé</source>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.has-sante.fr/jcms/p_3148994/fr/arterite-de-takayasu">https://www.has-sante.fr/jcms/p_3148994/fr/arterite-de-takayasu</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mouthon</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bussone</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Berezné</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Noël</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Guillevin</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Scleroderma renal crisis</article-title>
          <source>J Rheumatol</source>
          <year>2014</year>
          <month>06</month>
          <volume>41</volume>
          <issue>6</issue>
          <fpage>1040</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.3899/jrheum.131210</pub-id>
          <pub-id pub-id-type="medline">24833760</pub-id>
          <pub-id pub-id-type="pii">jrheum.131210</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gérardin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mageau</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mékinian</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tannier</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Carrat</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Construction of cohorts of similar patients from automatic extraction of medical concepts: phenotype extraction study</article-title>
          <source>JMIR Med Inform</source>
          <year>2022</year>
          <month>12</month>
          <day>19</day>
          <volume>10</volume>
          <issue>12</issue>
          <fpage>e42379</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2022/12/e42379/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/42379</pub-id>
          <pub-id pub-id-type="medline">36534446</pub-id>
          <pub-id pub-id-type="pii">v10i12e42379</pub-id>
          <pub-id pub-id-type="pmcid">PMC9808583</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carrino</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Llop</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pàmies</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gutiérrez-Fandiño</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Armengol-Estapé</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Silveira-Ocampo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Valencia</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Agirre</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Villegas</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Pretrained biomedical language models for clinical NLP in Spanish</article-title>
          <source>Proceedings of the 21st Workshop on Biomedical Language Processing</source>
          <year>2022</year>
          <conf-name>BioNLP '22</conf-name>
          <conf-date>May 26, 2022</conf-date>
          <conf-loc>Dublin, Ireland</conf-loc>
          <fpage>193</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2022.bionlp-1.19.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2022.bionlp-1.19</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>02</month>
          <day>15</day>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1234</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31501885"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
          <pub-id pub-id-type="medline">31501885</pub-id>
          <pub-id pub-id-type="pii">5566506</pub-id>
          <pub-id pub-id-type="pmcid">PMC7703786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="web">
          <article-title>Entrepôt de Données de Santé</article-title>
          <source>Assistance Hoptaux Publique de Paris</source>
          <access-date>2025-01-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://eds.aphp.fr/">https://eds.aphp.fr/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
