<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn></journal-meta><article-meta><article-id pub-id-type="publisher-id">49607</article-id><article-id pub-id-type="doi">10.2196/49607</article-id><title-group><article-title>Impact of Translation on Biomedical Information Extraction: Experiment on Real-Life Clinical Notes</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>G&#x00E9;rardin</surname><given-names>Christel</given-names></name><degrees>MSc, MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Xiong</surname><given-names>Yuhan</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wajsb&#x00FC;rt</surname><given-names>Perceval</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Carrat</surname><given-names>Fabrice</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Tannier</surname><given-names>Xavier</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib></contrib-group><aff id="aff1"><institution>Institut Pierre Louis d'Epid&#x00E9;miologie et de Sant&#x00E9; Publique, Sorbonne Universit&#x00E9;, Institut National de la Sant&#x00E9; et de la Recherche M&#x00E9;dicale</institution>, <addr-line>Paris</addr-line>, <country>France</country></aff><aff id="aff2"><institution>Shanghai Jiaotong University</institution>, <addr-line>Shanghai</addr-line>, <country>China</country></aff><aff id="aff3"><institution>Innovation and Data Unit, Assistance Publique H&#x00F4;pitaux de Paris</institution>, <addr-line>Paris</addr-line>, <country>France</country></aff><aff id="aff4"><institution>Department of Public Health, Assistance Publique H&#x00F4;pitaux de Paris, H&#x00F4;pital Saint-Antoine</institution>, <addr-line>Paris</addr-line>, <country>France</country></aff><aff id="aff5"><institution>Sorbonne Universit&#x00E9;, Institut National de la Sant&#x00E9; et de la Recherche M&#x00E9;dicale, Universit&#x00E9; Sorbonne Paris-Nord, Laboratoire d'Informatique M&#x00E9;dicale et de Connaissance en e-Sant&#x00E9;</institution>, <addr-line>Paris</addr-line>, <country>France</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Lovis</surname><given-names>Christian</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Modersohn</surname><given-names>Luise</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Torii</surname><given-names>Manabu</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Christel G&#x00E9;rardin, MSc, MD<email>christel.ducroz-gerardin@iplesp.upmc.fr</email></corresp></author-notes><pub-date pub-type="collection"><year>2024</year></pub-date><pub-date pub-type="epub"><day>4</day><month>4</month><year>2024</year></pub-date><volume>12</volume><elocation-id>e49607</elocation-id><history><date date-type="received"><day>03</day><month>06</month><year>2023</year></date><date date-type="rev-recd"><day>07</day><month>01</month><year>2024</year></date><date date-type="accepted"><day>10</day><month>01</month><year>2024</year></date></history><copyright-statement>&#x00A9; Christel G&#x00E9;rardin, Yuhan Xiong, Perceval Wajsb&#x00FC;rt, Fabrice Carrat, Xavier Tannier. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 4.4.2024. </copyright-statement><copyright-year>2024</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2024/1/e49607"/><abstract><sec><title>Background</title><p>Biomedical natural language processing tasks are best performed with English models, and translation tools have undergone major improvements. On the other hand, building annotated biomedical data sets remains a challenge.</p></sec><sec><title>Objective</title><p>The aim of our study is to determine whether the use of English tools to extract and normalize French medical concepts based on translations provides comparable performance to that of French models trained on a set of annotated French clinical notes.</p></sec><sec sec-type="methods"><title>Methods</title><p>We compared 2 methods: 1 involving French-language models and 1 involving English-language models. For the native French method, the named entity recognition and normalization steps were performed separately. For the translated English method, after the first translation step, we compared a 2-step method and a terminology-oriented method that performs extraction and normalization at the same time. We used French, English, and bilingual annotated data sets to evaluate all stages (named entity recognition, normalization, and translation) of our algorithms.</p></sec><sec sec-type="results"><title>Results</title><p>The native French method outperformed the translated English method, with an overall <italic>F</italic><sub>1</sub>-score of 0.51 (95% CI 0.47-0.55), compared with 0.39 (95% CI 0.34-0.44) and 0.38 (95% CI 0.36-0.40) for the 2 English methods tested.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Despite recent improvements in translation models, there is a significant difference in performance between the 2 approaches in favor of the native French method, which is more effective on French medical texts, even with few annotated documents.</p></sec></abstract><kwd-group><kwd>concept normalization</kwd><kwd>named entity recognition</kwd><kwd>natural language processing</kwd><kwd>translation</kwd><kwd>translational tool</kwd><kwd>biomedical data set</kwd><kwd>bilingual language model</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Named entity recognition (NER) and term normalization are important steps in biomedical natural language processing (NLP). NER is used to extract key information from textual medical reports, and normalization consists of matching a specific term to its formal reference in a shared terminology such as the United Medical Language System (UMLS) Metathesaurus [<xref ref-type="bibr" rid="ref1">1</xref>]. Major improvements have been made recently in these areas, particularly for English, as a huge amount of data is available in the literature and resources. Modern automatic language processing relies heavily on pretrained language models, which enable efficient semantic representation of texts. The development of algorithms such as transformers [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>] has led to significant progress in this field.</p><p>In <xref ref-type="fig" rid="figure1">Figure 1</xref>, the term &#x201C;mention level&#x201D; indicates that the analysis is carried out at the level of a word or small group of words: first at the NER stage (in blue) and then during normalization (in green); finally, all mentions with normalized concept unique identifiers (CUIs) are aggregated at the &#x201C;document level&#x201D; (orange part). The sets of aggregated CUIs per document predicted by the native French and translated English approaches are then compared to the manually annotated gold standard.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Overall objective of the method: translating plain text to the CUI codes of the UMLS Metathesaurus, document by document. CHEM: Chemicals &#x0026; Drugs; CUI: concept unique identifier; DISO: Disorders; PROC: Procedures; UMLS: United Medical Language System.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e49607_fig01.png"/></fig><p>In many languages other than English, efforts remain to be made to obtain such results, notably due to a much smaller quantity of accessible data [<xref ref-type="bibr" rid="ref4">4</xref>]. In this context, our work explores the relevance of a translation step for the recognition and normalization of medical concepts in French biomedical documents. We compared 2 methods: (1) a native French approach where only annotated documents and resources in French are used and (2) a translation-based approach where documents are translated into English, in order to take advantage of existing tools and resources for this language that would allow the extraction of concepts mentioned in unpublished French texts without new training data (zero-shot), as proposed in van Mulligen et al [<xref ref-type="bibr" rid="ref5">5</xref>].</p><p>We evaluated and discussed the results on several French biomedical corpora, including a new set of 42 annotated hospitalization reports with 4 entity groups. We evaluated the normalization task at the document level, in order to avoid a cross-language alignment step at evaluation time, which would add a potential level of error and thus make the results more difficult to interpret (see word alignment in Gao and Vogel [<xref ref-type="bibr" rid="ref6">6</xref>] and Vogel et al [<xref ref-type="bibr" rid="ref7">7</xref>]). This normalization was carried out by mapping all terms to their CUI in the UMLS Metathesaurus [<xref ref-type="bibr" rid="ref1">1</xref>]. <xref ref-type="fig" rid="figure1">Figure 1</xref> summarizes these various stages, from the raw French text and the translated English text to the aggregation and comparison of CUIs at the document level. Our code is available on GitHub [<xref ref-type="bibr" rid="ref8">8</xref>].</p><p>The various stages of our algorithms rely heavily on transformers language models [<xref ref-type="bibr" rid="ref2">2</xref>]. These models currently represent the state of the art for many NLP tasks, such as machine translation, NER, classification, and text normalization (also known as entity binding). Once trained, these models can represent any specific language, such as biomedical or legal. The power of these models comes from their neural architecture but also largely depends on the amount of data they are trained on. In the biomedical field, 2 main types of data are available: public articles (eg PubMed) and clinical electronic medical record databases (eg MIMIC-III [<xref ref-type="bibr" rid="ref9">9</xref>]), and the most powerful models are, for example, BioBERT [<xref ref-type="bibr" rid="ref10">10</xref>], which has been trained on the whole of PubMed in English, and ClinicalBERT [<xref ref-type="bibr" rid="ref11">11</xref>], which has been trained on PubMed and MIMIC-III. In French, the variety of models is less extensive, with CamemBERT [<xref ref-type="bibr" rid="ref12">12</xref>] and FlauBERT [<xref ref-type="bibr" rid="ref13">13</xref>] for the general domain and no specific model available for the biomedical domain.</p><p>In <xref ref-type="fig" rid="figure2">Figure 2</xref>, axis 1 (green axis on the left) corresponds to the native French branch with a NER step based on a FastText model trained from scratch on French clinical notes and a CamemBERT model. A multilingual Bidirectional Encoder Representations From Transformers (BERT) model was then used for the normalization step, with 2 models tested: a deep multilingual normalization model [<xref ref-type="bibr" rid="ref14">14</xref>] and CODER [<xref ref-type="bibr" rid="ref15">15</xref>] with the full version. Axes 2.1 and 2.2 (the 2 purple axes on the right) correspond to the translated English branches, with a first translation step performed by the OPUS-MT-FR-EN model [<xref ref-type="bibr" rid="ref16">16</xref>] for both. Axis 2.1 (left) was conducted with decoupled NER and normalization steps: FastText trained from PubMed and MIMIC-III [<xref ref-type="bibr" rid="ref17">17</xref>] for NER, and deep multilingual normalization [<xref ref-type="bibr" rid="ref14">14</xref>] or CODER [<xref ref-type="bibr" rid="ref15">15</xref>] with the English version for normalization. Axis 2.2 (right) used a single system for the NER and normalization stages: MedCAT [<xref ref-type="bibr" rid="ref18">18</xref>].</p><p>In addition to particularly powerful English-language pretrained models, universal biomedical terminologies (ie, the UMLS Metathesaurus) also contain many more English terms than other languages. For example, the UMLS Metathesaurus [<xref ref-type="bibr" rid="ref1">1</xref>] contains at least 10 times more English terms than French terms, which may enable rule-based models to perform better in English. As mentioned above, each reference concept in the UMLS Metathesaurus [<xref ref-type="bibr" rid="ref1">1</xref>] is assigned a CUI, associated with a set of synonyms, possibly in several languages, and a semantic group, such as <italic>Disorders</italic>, <italic>Chemicals &#x0026; Drugs</italic>, <italic>Procedure</italic>, <italic>Anatomy</italic>, etc.</p><p>In parallel, the performance of machine translation has also improved thanks to the same type of transformer-based language models, and recent years have seen the emergence of high-quality machine translations, such as OPUS-MT developed by Tiedemann et al [<xref ref-type="bibr" rid="ref16">16</xref>], Google Translate, and others. These 2 observations have led several research teams to add a translation step in order to analyze medical texts, for example, to extract relevant mentions in ultrasound reports [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>] or in the case of the standardization of medical concepts [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Work in the general (nonmedical) domain has also focused on alignment between named entities in parallel bilingual texts [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>].</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Diagram of different experiments comparing French and English language models without and with intermediate translation steps. CHEM: Chemicals &#x0026; Drugs; CUI: concept unique identifier; DEVI: Devices; DISO: Disorders; EHR: electronic health record; EN: English; FR: French; FT: fine-tuned; PROC: Procedures; UMLS: United Medical Language System.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e49607_fig02.png"/></fig></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Approaches</title><sec id="s2-1-1"><title>Overview</title><p><xref ref-type="fig" rid="figure2">Figure 2</xref> shows the main approaches and models used in our study. We explored 1 &#x201C;native French approach axis&#x201D; (axis 1 in <xref ref-type="fig" rid="figure2">Figure 2</xref>), based on French linguistic models learned from and applied to French annotated data, and 2 &#x201C;translated English approach axes&#x201D; (axes 2.1 and 2.2), based on a translation step and concept extraction tools in English. We compared the performance of all axes with the average of the document-level CUI prediction precisions for all documents.</p></sec><sec id="s2-1-2"><title>Native French Approach</title><p>Axis 1 consisted of 2 stages: a NER stage and a normalization stage. For the NER stage, we used the nested NER algorithm. Next, a normalization step was performed by 2 different algorithms: a deep multilingual normalization model [<xref ref-type="bibr" rid="ref14">14</xref>] and CODER [<xref ref-type="bibr" rid="ref15">15</xref>] with the <italic>CODER all</italic> version.</p></sec><sec id="s2-1-3"><title>Translated-English Approach</title><p>First, axes 2.1 and 2.2 consisted of a translation step, performed by the state-of-the-art OPUS-MT-FR-EN [<xref ref-type="bibr" rid="ref16">16</xref>] or Google Translate algorithm. Second, similar to axis 1, axis 2.1 was based on a NER step and a normalization step. The NER step was performed by the same algorithm but trained on the National NLP Clinical Challenges (N2C2) 2019 data set [<xref ref-type="bibr" rid="ref24">24</xref>] without manual annotation realignment; for the normalization step, we used the same deep multilingual algorithm [<xref ref-type="bibr" rid="ref14">14</xref>] and the English version of CODER [<xref ref-type="bibr" rid="ref15">15</xref>] based on a BioBERT model [<xref ref-type="bibr" rid="ref10">10</xref>]. This axis allows us to compare 2 methods whose difference lies solely in the translation step.</p><p>Axis 2.2 was based on the MedCAT [<xref ref-type="bibr" rid="ref18">18</xref>] algorithm, which performs NER and normalization simultaneously. In this case, we compared the native French method with a state-of-the-art, ready-to-use English system, which is not available in French.</p></sec></sec><sec id="s2-2"><title>Data Sets</title><p>For all our experiments, we chose to focus on 4 semantic groups of the UMLS Metathesaurus [<xref ref-type="bibr" rid="ref1">1</xref>]: <italic>Chemical &#x0026; Drugs</italic> (&#x201C;CHEM&#x201D;); <italic>Devices</italic> (&#x201C;DEVI&#x201D;), corresponding to medical devices such as pacemakers, catheters, etc; <italic>Disorders</italic> (&#x201C;DISO&#x201D;), corresponding to all signs, symptoms, results (eg, positive or negative results of biological tests), and diseases; and <italic>Procedures</italic> (&#x201C;PROC&#x201D;), corresponding to all diagnostic and therapeutic procedures such as imaging, biological tests, operative procedures, etc, as well as the corresponding number of documents.</p><p><xref ref-type="table" rid="table1">Table 1</xref> shows the data sets used for all our experiments and the corresponding number of documents. First, 2 French data sets were used for the final evaluation, as well as for training the axis-1 models. QUAERO is a freely available corpus [<xref ref-type="bibr" rid="ref25">25</xref>] based on pharmacological notes with 2 subcorpora: MEDLINE (short sentences from PubMed abstracts) and EMEA (drug package inserts). We also annotated a new data set of real-life clinical notes from the Assistance Publique H&#x00F4;pitaux de Paris data warehouse, described in Section S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Overview of all data sets used. When a data set is used for both training and testing, 80% of the data set is used for training and 20% is used for testing. Thus, for the EMEA data set, 30 documents were used for training and 8 for testing, 34 French notes were used for training and 8 for testing, and so on.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Variables</td><td align="left" valign="bottom" colspan="7">Languages and data sets</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom" colspan="3">French</td><td align="left" valign="bottom" colspan="2">English</td><td align="left" valign="bottom" colspan="2">English and French</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom" colspan="2">QUAERO [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="bottom">French notes</td><td align="left" valign="bottom">N2C2<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> 2019 [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="bottom">Mantra [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="bottom">WMT<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> 2016 [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="char" char="." valign="bottom">WMT 2019 [<xref ref-type="bibr" rid="ref28">28</xref>]</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">EMEA</td><td align="left" valign="bottom">MEDLINE</td><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Type</td><td align="left" valign="top">Drug notices</td><td align="left" valign="top">MEDLINE titles</td><td align="left" valign="top">French notes</td><td align="left" valign="top">English notes</td><td align="left" valign="top">Drug notices and MEDLINE titles</td><td align="left" valign="top">PubMed abstracts</td><td align="left" valign="top">PubMed abstracts</td></tr><tr><td align="left" valign="top" colspan="2">Size (documents), n</td><td align="left" valign="top">38</td><td align="left" valign="top">2514</td><td align="left" valign="top">42</td><td align="left" valign="top">100</td><td align="left" valign="top">200</td><td align="left" valign="top">&#x003E;600,000 sent</td><td align="left" valign="top">6542</td></tr><tr><td align="left" valign="top" colspan="9"><bold>Use</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Train NER<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Test NER</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Normalization</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Test MedCAT</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Translation (fine-tuning)</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Translation (test)</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2003;</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>N2C2: National Natural Language Processing Clinical Challenges.</p></fn><fn id="table1fn2"><p><sup>b</sup>WMT: Workshop on Machine Translation.</p></fn><fn id="table1fn3"><p><sup>c</sup>NER: named entity recognition.</p></fn></table-wrap-foot></table-wrap><p>Second, we used the N2C2 2019 corpus [<xref ref-type="bibr" rid="ref24">24</xref>] with annotated CUIs, on which we automatically added semantic group information from the UMLS Metathesaurus [<xref ref-type="bibr" rid="ref1">1</xref>], to train the axis-2.1 system and evaluate the NER and English normalization algorithms. We also used the Mantra data set [<xref ref-type="bibr" rid="ref26">26</xref>], a multilingual reference corpus for biomedical concept recognition.</p><p>Finally, we refined and tested the translation algorithms on the Workshop on Machine Translation biomedical corpora of 2016 [<xref ref-type="bibr" rid="ref27">27</xref>] and 2019 [<xref ref-type="bibr" rid="ref28">28</xref>]. A detailed description of the number of respective entities in the data sets can be found in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>The annotation methods for the French corpus are detailed in Section S1 and Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The distribution of entities for this annotation is detailed in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-3"><title>Translation</title><p>We used and compared 2 main algorithms for the translation step: the OPUS-MT-FR-EN model [<xref ref-type="bibr" rid="ref16">16</xref>], which we tested without and with <italic>fine-tuning</italic> on the 2 biomedical translation corpora of 2016 and 2019 [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>], and Google Translate as a comparison model.</p></sec><sec id="s2-4"><title>NER Algorithm</title><p>For this step, we used the algorithm of Wajsb&#x00FC;rt [<xref ref-type="bibr" rid="ref29">29</xref>] described in G&#x00E9;rardin et al [<xref ref-type="bibr" rid="ref30">30</xref>]. This model is based on the representation of a BERT transformer [<xref ref-type="bibr" rid="ref3">3</xref>] and calculates the scores of all possible concepts to be predicted in the text. The extracted concepts are delimited by 3 values: start, end, and label. More precisely, the encoding of the text corresponds to the last 4 layers of BERT, FastText integration, and a max-pool Char-CNN [<xref ref-type="bibr" rid="ref31">31</xref>] representation of the word. The decoding step is then performed by a 3-layer long short-term memory [<xref ref-type="bibr" rid="ref32">32</xref>] with learning weights [<xref ref-type="bibr" rid="ref33">33</xref>], similar to the method in Yu et al [<xref ref-type="bibr" rid="ref34">34</xref>]. A sigmoid function was added to the vertex. Values (start, end, and label) with a score greater than 0.5 were retained for prediction. The loss function was a binary cross-entropy, and we used the Adam optimizer [<xref ref-type="bibr" rid="ref35">35</xref>].</p><p>In our experiments, for the native French axis (axis 1 in <xref ref-type="fig" rid="figure2">Figure 2</xref>), the pretrained embeddings used to train the model were based on a FastText model [<xref ref-type="bibr" rid="ref36">36</xref>], trained from scratch on 5 gigabytes of clinical text, and a CamemBERT-large model [<xref ref-type="bibr" rid="ref12">12</xref>] <italic>fine-tuned</italic> on this same data set. For English axis 2.1, the pretrained models were BioWordVec [<xref ref-type="bibr" rid="ref17">17</xref>] and ClinicalBERT [<xref ref-type="bibr" rid="ref11">11</xref>].</p></sec><sec id="s2-5"><title>Normalization Algorithms</title><sec id="s2-5-1"><title>Overview</title><p>This stage of our experiments was essential for comparing a method in native French and one translated into English, and it consisted of matching each mention extracted from the text to its associated CUI in the UMLS Metathesaurus [<xref ref-type="bibr" rid="ref1">1</xref>]. We compared 3 models for this step, described below: the deep multilingual normalization algorithm developed by Wajsb&#x00FC;rt et al [<xref ref-type="bibr" rid="ref14">14</xref>]; CODER [<xref ref-type="bibr" rid="ref15">15</xref>]; and the MedCAT [<xref ref-type="bibr" rid="ref18">18</xref>] model, which performs both NER and normalization.</p><p>These 3 models require no training data set other than the UMLS Metathesaurus.</p></sec><sec id="s2-5-2"><title>Deep Multilingual Normalization</title><p>This algorithm by Wajsb&#x00FC;rt et al [<xref ref-type="bibr" rid="ref14">14</xref>] considers the normalization task as a highly multiclass classification problem with cosine similarity and a softmax function as the last layer. The model is based on contextual integration, using the pretrained multilingual BERT model [<xref ref-type="bibr" rid="ref3">3</xref>], and works in 2 steps. In the first step, the BERT model is fine-tuned and the French UMLS terms and their corresponding English synonyms are learned. Then, in the second step, the BERT model is frozen and the representation of all English-only terms (ie, those present only in English in the UMLS Metathesaurus [<xref ref-type="bibr" rid="ref1">1</xref>]) is learned. The same training is used for the native French and translated English approaches. This model was trained with the 2021 version of the UMLS Metathesaurus [<xref ref-type="bibr" rid="ref1">1</xref>], corresponding to the version used for annotating the French corpus. The model was thus trained on over 4 million concepts corresponding to 2 million CUIs.</p></sec><sec id="s2-5-3"><title>CODER</title><p>The CODER algorithm [<xref ref-type="bibr" rid="ref15">15</xref>] was developed by contrastive learning on the basis of the medical knowledge graph of the UMLS Metathesaurus [<xref ref-type="bibr" rid="ref1">1</xref>], with concept similarities being calculated from the representation of terms and relations in this knowledge graph. Contrastive learning is used to learn embeddings through multisimilarity loss [<xref ref-type="bibr" rid="ref37">37</xref>]. The authors have developed 2 versions: a multilingual version based on the multilingual BERT [<xref ref-type="bibr" rid="ref3">3</xref>] and an English version based on the pretrained BioBERT model [<xref ref-type="bibr" rid="ref10">10</xref>]. We used the multilingual version for axis 1 (native French approach) and the English version for axis 2.1. Both types of this model (<italic>CODER all</italic> and <italic>CODER en</italic>) were trained with the 2020 version of UMLS (publicly available models). <italic>CODER all</italic> [<xref ref-type="bibr" rid="ref15">15</xref>] was trained on over 4 million concepts corresponding to 2 million CUIs, and <italic>CODER en</italic> was trained on over 3 million terms and 2 million CUIs.</p><p>For the deep multilingual model and the CODER model, in order to improve performance in terms of accuracy, we chose to add semantic group information (ie, <italic>Chemical &#x0026; Drugs</italic>, <italic>Devices</italic>, <italic>Disorders</italic>, and <italic>Procedures</italic>) to the model output: that is, from the first <italic>k</italic> CUIs chosen from a mention, we selected the first from the corresponding group.</p><p>The MedCAT algorithm is described in detail in Section S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec></sec><sec id="s2-6"><title>Ethical Considerations</title><p>The study and its experimental protocol were approved by the Assistance Publique H&#x00F4;pitaux de Paris Scientific and Ethical Committee (IRB00011591, decision CSE 20-0093). Patients were informed that their electronic health record information could be reused after an anonymization process, and those who objected to the reuse of their data were excluded. All methods were applied in accordance with the relevant guidelines (<italic>Commission nationale de l'informatique et des libert&#x00E9;s</italic> reference methodology MR-004 [<xref ref-type="bibr" rid="ref38">38</xref>]).</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>The sections below present the performance results for each stage. The N2C2 2019 challenge corpus [<xref ref-type="bibr" rid="ref24">24</xref>] enabled us to evaluate the performance of our English models on clinical data, and the Biomedical Translation 2016 shared task [<xref ref-type="bibr" rid="ref27">27</xref>] allowed us to evaluate our translation performance on biomedical data with a BLEU score [<xref ref-type="bibr" rid="ref39">39</xref>].</p><sec id="s3-1"><title>NER Performances</title><p>To be able to compare our approaches in native French and translated English, we used the same NER model, trained and tested on each of the data sets described above. <xref ref-type="table" rid="table2">Table 2</xref> shows the corresponding results. Overall <italic>F</italic><sub>1</sub>-scores were similar across data sets: from 0.72 to 0.77.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Named entity recognition (NER) performance for each model. For all experiments, we used the same NER algorithm but with different pretrained models. The best performance values are italicized.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Groups</td><td align="left" valign="bottom" colspan="9">Data sets and models</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom" rowspan="2" colspan="3">EMEA test, with FastText*<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> and CamemBERT-FT [<xref ref-type="bibr" rid="ref12">12</xref>]</td><td align="left" valign="bottom" rowspan="2" colspan="3">French notes, with FastText* and CamemBERT-FT</td><td align="left" valign="bottom" rowspan="2" colspan="3">N2C2<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> 2019 test, with BioWordVec [<xref ref-type="bibr" rid="ref17">17</xref>] and ClinicalBERT [<xref ref-type="bibr" rid="ref11">11</xref>]</td></tr><tr><td align="left" valign="bottom"/></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td></tr></thead><tbody><tr><td align="left" valign="top">CHEM<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">0.80</td><td align="left" valign="top">0.83</td><td align="left" valign="top">0.82</td><td align="left" valign="top">0.84</td><td align="left" valign="top">0.88</td><td align="left" valign="top">0.86</td><td align="left" valign="top">0.87</td><td align="left" valign="top">0.85</td><td align="left" valign="top">0.86</td></tr><tr><td align="left" valign="top">DEVI<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.42</td><td align="left" valign="top">0.81</td><td align="left" valign="top">0.55</td><td align="left" valign="top">0.00</td><td align="left" valign="top">0.00</td><td align="left" valign="top">0.00</td><td align="left" valign="top">0.58</td><td align="left" valign="top">0.51</td><td align="left" valign="top">0.54</td></tr><tr><td align="left" valign="top">DISO<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">0.54</td><td align="left" valign="top">0.63</td><td align="left" valign="top">0.59</td><td align="left" valign="top">0.67</td><td align="left" valign="top">0.65</td><td align="left" valign="top">0.66</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.72</td><td align="left" valign="top">0.73</td></tr><tr><td align="left" valign="top">PROC<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup></td><td align="left" valign="top">0.73</td><td align="left" valign="top">0.78</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.78</td><td align="left" valign="top">0.72</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.80</td><td align="left" valign="top">0.78</td><td align="left" valign="top">0.79</td></tr><tr><td align="left" valign="top"><italic>Overall</italic></td><td align="left" valign="top"><italic>0.71</italic></td><td align="left" valign="top"><italic>0.77</italic></td><td align="left" valign="top"><italic>0.74</italic></td><td align="left" valign="top"><italic>0.73</italic></td><td align="left" valign="top"><italic>0.71</italic></td><td align="left" valign="top"><italic>0.72</italic></td><td align="left" valign="top"><italic>0.78</italic></td><td align="left" valign="top"><italic>0.76</italic></td><td align="left" valign="top"><italic>0.77</italic></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>FastText* corresponds to a FastText model [<xref ref-type="bibr" rid="ref36">36</xref>] trained from scratch on our clinical data set.</p></fn><fn id="table2fn2"><p><sup>b</sup>N2C2: National Natural Language Processing Clinical Challenges.</p></fn><fn id="table2fn3"><p><sup>c</sup>CHEM: Chemical &#x0026; Drugs.</p></fn><fn id="table2fn4"><p><sup>d</sup>DEVI: Devices.</p></fn><fn id="table2fn5"><p><sup>e</sup>DISO: Disorders.</p></fn><fn id="table2fn6"><p><sup>f</sup>PROC: Procedures.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Normalization Performances</title><p>This section presents only the normalization performance based on the gold standard&#x2019;s entity mentions, without the intermediate steps. The results are summarized in <xref ref-type="table" rid="table3">Table 3</xref>. The deep multilingual algorithm performed better for all corpora tested, with an improvement in <italic>F</italic><sub>1</sub>-score from +0.6 to +0.11. By way of comparison, the winning team of the 2019 N2C2 had achieved an accuracy of 0.85 using the N2C2 data set directly to train their algorithm [<xref ref-type="bibr" rid="ref24">24</xref>]. In our context of comparing algorithms between 2 languages, the normalization algorithms were not trained on data other than the UMLS Metathesaurus. MedCAT&#x2019;s performance (shown in Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) cannot be directly compared with that of other models, as this method performed both NER and normalization in a single step. However, we note that this algorithm performed as well as axis 2.1 in terms of overall performance, as shown in <xref ref-type="table" rid="table4">Table 4</xref>.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Performance of the normalization step. Model results were calculated from the annotated data sets, focusing on the 4 semantic groups of interest: <italic>Chemical &#x0026; Drugs</italic>, <italic>Devices</italic>, <italic>Disorders</italic>, and <italic>Procedures</italic>. The best performance values are italicized.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Algorithms</td><td align="left" valign="bottom" colspan="3">Data set models</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">EMEA test</td><td align="left" valign="bottom">French notes</td><td align="left" valign="bottom">N2C2<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> 2019 test</td></tr></thead><tbody><tr><td align="left" valign="top">Deep multilingual normalization</td><td align="left" valign="top"><italic>0.65</italic></td><td align="char" char="." valign="top"><italic>0.57</italic></td><td align="left" valign="top"><italic>0.74</italic></td></tr><tr><td align="left" valign="top">CODER all</td><td align="left" valign="top">0.58</td><td align="left" valign="top">0.51</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td></tr><tr><td align="left" valign="top">CODER en</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.63</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>N2C2: National Natural Language Processing Clinical Challenges.</p></fn><fn id="table3fn2"><p><sup>b</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Overall performances. The normalization step was performed by the deep multilingual model and the translation was performed by the OPUS-MT-FR-EN FT model. The best performance values are italicized.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Methods</td><td align="left" valign="bottom" colspan="3">EMEA test</td><td align="left" valign="bottom" colspan="3">French notes</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score (95% CI)</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score (95% CI)</td></tr></thead><tbody><tr><td align="left" valign="top">Axis 1 (French NER<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup>+normalization)</td><td align="left" valign="top">0.63</td><td align="left" valign="top">0.60</td><td align="left" valign="top"><italic>0.61 (0.53-0.65)</italic></td><td align="left" valign="top">0.49</td><td align="left" valign="top">0.53</td><td align="left" valign="top"><italic>0.51 (0.47-0.55)</italic></td></tr><tr><td align="left" valign="top">Axis 2.1 (Translation+NER+normalization)</td><td align="left" valign="top">0.53</td><td align="left" valign="top">0.40</td><td align="left" valign="top">0.45 (0.38-0.51)</td><td align="left" valign="top">0.41</td><td align="left" valign="top">0.38</td><td align="left" valign="top">0.39 (0.34-0.44)</td></tr><tr><td align="left" valign="top">Axis 2.2 (Translation+MedCAT [<xref ref-type="bibr" rid="ref18">18</xref>])</td><td align="left" valign="top">0.53</td><td align="left" valign="top">0.46</td><td align="left" valign="top">0.49 (0.38-0.54)</td><td align="left" valign="top">0.38</td><td align="left" valign="top">0.38</td><td align="left" valign="top">0.38 (0.36-0.40)</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>NER: named entity recognition.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Translation Performances</title><p>For both translation models, the respective BLEU scores [<xref ref-type="bibr" rid="ref39">39</xref>] were calculated on the shared 2016 Biomedical Translation Task [<xref ref-type="bibr" rid="ref27">27</xref>]. The chosen BLEU algorithm was the weighted geometric mean of the n-gram precisions per sentence.</p><p>A fine-tuned version of OPUS-MT-FR-EN [<xref ref-type="bibr" rid="ref16">16</xref>] was also tested on the 2016 and 2019 Biomedical Translation shared tasks. For fine-tuning, we used the following hyperparameters: a maximum sequence length of 128 (mainly for computational memory reasons), a learning rate of 2 &#x00D7; 10<sup>&#x2013;5</sup>, and a weight decay of 0.01, and we varied the number of epochs up to 15 epochs (the error function curve stops decaying after 10 epochs). The Google Translate model could not be used for our clinical score experiments for reasons of confidentiality.</p><p><xref ref-type="table" rid="table5">Table 5</xref> presents the BLEU scores for the 3 models, showing that fine-tuning the OPUS-MT-FR-EN model [<xref ref-type="bibr" rid="ref16">16</xref>] on biomedical data sets gave the best results, with a BLEU score [<xref ref-type="bibr" rid="ref39">39</xref>] of 0.51. This was the model used to calculate the overall performance of axes 2.1 and 2.2.</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Translation performances: BLEU scores of the translation models. The best performance value is italicized.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Models</td><td align="left" valign="bottom">WMT<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup> Biomed 2016 test</td></tr></thead><tbody><tr><td align="left" valign="top">Google Translate</td><td align="left" valign="top">0.42</td></tr><tr><td align="left" valign="top">OPUS-MT-FR-EN</td><td align="left" valign="top">0.31</td></tr><tr><td align="left" valign="top">OPUS-MT-FR-EN FT<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></td><td align="left" valign="top"><italic>0.51</italic></td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>WMT: Workshop on Machine Translation.</p></fn><fn id="table5fn2"><p><sup>b</sup>OPUS-MT-FR-EN FT corresponds to the OPUS-MT-FR-EN model [<xref ref-type="bibr" rid="ref16">16</xref>] <italic>fine-tuned</italic> on biomedical translated corpus from the WMT Biomedical Translation Tasks in 2016 [<xref ref-type="bibr" rid="ref27">27</xref>] and 2019 [<xref ref-type="bibr" rid="ref28">28</xref>].</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-4"><title>Overall Performances From Raw Text to CUI Predictions</title><p>This section presents the overall performance of the 3 axes, in an end-to-end pipeline. For axis 2, the results are those obtained with the best normalization algorithm (presented in <xref ref-type="table" rid="table3">Table 3</xref>). The model used for translation is the OPUS-MT-FR-EN [<xref ref-type="bibr" rid="ref16">16</xref>] fine-tuned model. The results are presented in <xref ref-type="table" rid="table4">Table 4</xref>, with the best results obtained by the native French approach on the EMEA corpus [<xref ref-type="bibr" rid="ref25">25</xref>] and French clinical notes. The 95% CIs were calculated using the empirical bootstrap method [<xref ref-type="bibr" rid="ref40">40</xref>].</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>In this paper, we compared 2 approaches for extracting medical concepts from clinical notes: a French approach based on a French language model and a translated English approach, where we compared 2 state-of-the-art English biomedical language models, after a translation step. The main advantages of our experiment are that it is reproducible and that we were able to analyze the performance of each step of the algorithm: NER, normalization, and translation, and to test several models for each step.</p></sec><sec id="s4-2"><title>The Quality of the Translation Is Not Sufficient</title><p>We showed that the native French approach outperformed the 2 translated English approaches, even with a small French training data set. This analysis confirms that, where possible, an annotated data set improves feature extraction. The evaluation of each intermediate step showed that the performance of each module was similar in French and English. We can therefore conclude that it is rather the translation phase itself that is of insufficient quality to allow the use of English as a proxy without a loss of performance. This is confirmed by the translation performance calculations, where the calculated BLEU scores were relatively low, although improved by a fine-tuning step.</p><p>In conclusion, although translation is commonly used for entity extraction or term normalization in languages other than English [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref43">43</xref>], due to the availability of turnkey models that do not require additional annotation by a clinician, we showed that this induces a significant performance loss.</p><p>Commercial application programming interface&#x2013;based translation services could not be used for our task due to data confidentiality issues. However, the OPUS-MT model is considered state of the art, it is adjustable to domain-specific data, and the translation results presented in <xref ref-type="table" rid="table5">Table 5</xref> confirm the absence of performance difference between this model and the Google Translate model.</p><p>Although our experiments were carried out on a single language, the French-English pair is one of the best performers in recent translation benchmarks [<xref ref-type="bibr" rid="ref16">16</xref>]. Other languages are unlikely to produce significantly better results.</p></sec><sec id="s4-3"><title>Error Analysis</title><p>In these experiments, the overall results may appear low, but the task is still complex, especially because the UMLS Metathesaurus [<xref ref-type="bibr" rid="ref1">1</xref>] contains many synonyms with different CUIs. To better understand this, we performed an error analysis on the normalization task only, as shown in Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, with a physician&#x2019;s evaluation, on a sample of 100 errors for both models. We calculated that 24% (24/100) and 39% (39/100) of the terms found by the deep normalization algorithm [<xref ref-type="bibr" rid="ref14">14</xref>] and CODER [<xref ref-type="bibr" rid="ref15">15</xref>], respectively, were in fact synonyms but had 2 different UMLS CUIs. This highlights the difficulty of achieving normalization on the UMLS Metathesaurus. The UMLS Metathesaurus indeed groups together numerous terminologies whose mapping between terms is often imperfect, implying that certain synonyms, as shown here, do not have the same CUI, as pointed out by Cimino [<xref ref-type="bibr" rid="ref44">44</xref>] and Jim&#x00E9;nez-Ruiz et al [<xref ref-type="bibr" rid="ref45">45</xref>]. For example, &#x201C;cardiac ultrasound&#x201D; has the CUI of C1655737, whereas &#x201C;echocardiography&#x201D; has another CUI of C0013516; similarly, &#x201C;H/O: thromboembolism&#x201D; has a CUI of C0455533, whereas &#x201C;history of thromboembolism&#x201D; has a CUI of C1997787, and so on.</p><p>Moreover, to be more precise, each axis had its own errors: overall, the errors in axis 2 were essentially due to the loss of information in translation. One notable error was literal translation: for example, &#x201C;dispersed lupus erythematous&#x201D; instead of &#x201C;systemic lupus erythematosus,&#x201D; or &#x201C;crepitant&#x201D; instead of &#x201C;crackles.&#x201D; This loss of translation led to more errors in the extraction of named entities.</p><p>In addition to the loss of translation information, axis 2.1 was also penalized by the NER step, due to the difference between the training set (N2C2 notes) and the test set (the translated French notes; the aim being to compare the performance of English-language turnkey models with the performance of French-language models from an annotated set). Axis 2.1, for example, omitted the names of certain drugs more often.</p><p>Finally, both axes were penalized by abbreviations. These were often badly translated (for example, the abbreviation &#x201C;MFIU&#x201D; for &#x201C;mort foetale in utero,&#x201D; meaning &#x201C;intrauterine fetal death,&#x201D; was not translated), which penalized axis 2. Nevertheless, if they were indeed extracted by NER steps in axis 1, they were not correctly normalized due to the absence of a corresponding CUI in the UMLS Metathesaurus.</p></sec><sec id="s4-4"><title>Limitations</title><p>This work has several limitations. First, the actual French clinical notes contained very few terms in the <italic>Devices</italic> semantic group, which prevented the NER algorithm from finding them in the test data set. However, this drawback, which penalized the native French approach, still allowed us to draw a conclusion for the results. Furthermore, in this study, we did not take into account attributes of the extracted terms such as negation, hypothetical attribute, or belonging to a person other than the patient for comparison purposes, as the QUAERO [<xref ref-type="bibr" rid="ref25">25</xref>] and N2C2 2019 [<xref ref-type="bibr" rid="ref24">24</xref>] data sets did not have this labeled information.</p></sec></sec></body><back><ack><p>The authors would like to thank the Assistance Publique H&#x00F4;pitaux de Paris (AP-HP) data warehouse, which provided the data and the computing power to carry out this study under good conditions. We wish to thank all the medical colleges, including internal medicine, rheumatology, dermatology, nephrology, pneumology, hepato-gastroenterology, hematology, endocrinology, gynecology, infectiology, cardiology, oncology, emergency, and intensive care units, that gave their permission for the use of the clinical data.</p></ack><notes><sec><title>Data Availability</title><p>The data sets analyzed as part of this study are not accessible to the public due to the confidentiality of data from patient files, even after deidentification. However, access to raw data from the Assistance Publique H&#x00F4;pitaux de Paris (AP-HP) data warehouse can be granted by following the procedure described on its website [<xref ref-type="bibr" rid="ref46">46</xref>]: by contacting the ethical and scientific committee at secretariat.cse@aphp.fr. Prior validation of access by the local institutional review committee is required. In the case of non-APHP researchers, a collaboration contract must also be signed.</p></sec></notes><fn-group><fn fn-type="con"><p>CG contributed to conceptualization, data curation, formal analysis, investigation, methodology, software, validation, original drafting, writing&#x2014;original version, and writing&#x2014;revision and editing the manuscript. YX contributed to investigation, methodology, software, and validation. PW contributed to investigation, software, and revision of the manuscript. FC contributed to conceptualization, methodology, project administration, supervision, writing&#x2014;original version, and writing&#x2014;revision and editing of the manuscript. XT contributed to conceptualization, formal analysis, methodology, writing&#x2014;original version, and writing&#x2014;revision and editing of the manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">BERT</term><def><p>Bidirectional Encoder Representations From Transformers</p></def></def-item><def-item><term id="abb2">CUI</term><def><p>concept unique identifier</p></def></def-item><def-item><term id="abb3">N2C2</term><def><p>National Natural Language Processing Clinical Challenges</p></def></def-item><def-item><term id="abb4">NER</term><def><p>named entity recognition</p></def></def-item><def-item><term id="abb5">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb6">UMLS</term><def><p>United Medical Language System</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bodenreider</surname><given-names>O</given-names></name></person-group><article-title>The Unified Medical Language System (UMLS): integrating biomedical terminology</article-title><source>Nucleic Acids Res</source><year>2004</year><month>01</month><day>1</day><volume>32</volume><issue>suppl 1</issue><fpage>D267</fpage><lpage>D270</lpage><pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id><pub-id pub-id-type="medline">14681409</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Vaswani</surname><given-names>A</given-names></name><name name-style="western"><surname>Shazeer</surname><given-names>N</given-names></name><name name-style="western"><surname>Parmar</surname><given-names>N</given-names></name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Guyon</surname><given-names>I</given-names></name><name name-style="western"><surname>von Luxburg</surname><given-names>U</given-names></name><name name-style="western"><surname>Bengio</surname><given-names>S</given-names></name><etal/></person-group><article-title>Attention is all you need</article-title><source>Advances in Neural Information Processing Systems 30 (NIPS 2017)</source><year>2017</year><access-date>2024-03-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://papers.nips.cc/paper_files/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html">https://papers.nips.cc/paper_files/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names></name><name name-style="western"><surname>Chang</surname><given-names>MW</given-names></name><name name-style="western"><surname>Lee</surname><given-names>K</given-names></name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Burstein</surname><given-names>J</given-names></name><name name-style="western"><surname>Doran</surname><given-names>C</given-names></name><name name-style="western"><surname>Solorio</surname><given-names>T</given-names></name></person-group><article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title><source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</source><year>2019</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>4171</fpage><lpage>4186</lpage><pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>N&#x00E9;v&#x00E9;ol</surname><given-names>A</given-names></name><name name-style="western"><surname>Dalianis</surname><given-names>H</given-names></name><name name-style="western"><surname>Velupillai</surname><given-names>S</given-names></name><name name-style="western"><surname>Savova</surname><given-names>G</given-names></name><name name-style="western"><surname>Zweigenbaum</surname><given-names>P</given-names></name></person-group><article-title>Clinical natural language processing in languages other than English: opportunities and challenges</article-title><source>J Biomed Semantics</source><year>2018</year><month>03</month><day>30</day><volume>9</volume><issue>1</issue><fpage>12</fpage><pub-id pub-id-type="doi">10.1186/s13326-018-0179-8</pub-id><pub-id pub-id-type="medline">29602312</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>van Mulligen</surname><given-names>EM</given-names></name><name name-style="western"><surname>Afzal</surname><given-names>Z</given-names></name><name name-style="western"><surname>Akhondi</surname><given-names>SA</given-names></name><name name-style="western"><surname>Vo</surname><given-names>D</given-names></name><name name-style="western"><surname>Kors</surname><given-names>JA</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Balog</surname><given-names>K</given-names></name><name name-style="western"><surname>Cappellato</surname><given-names>L</given-names></name><name name-style="western"><surname>Ferro</surname><given-names>N</given-names></name><name name-style="western"><surname>Macdonald</surname><given-names>C</given-names></name></person-group><article-title>Erasmus MC at CLEF Ehealth 2016: concept recognition and coding in French texts</article-title><source>Working Notes of CLEF 2016 - Conference and Labs of the Evaluation Forum CEUR Workshop Proceedings, Vol 1609</source><year>2016</year><access-date>2024-03-15</access-date><publisher-name>CEUR-WS.org</publisher-name><fpage>171</fpage><lpage>178</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://ceur-ws.org/Vol-1609/16090171.pdf">https://ceur-ws.org/Vol-1609/16090171.pdf</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Gao</surname><given-names>Q</given-names></name><name name-style="western"><surname>Vogel</surname><given-names>S</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Cohen</surname><given-names>KB</given-names></name><name name-style="western"><surname>Carpenter</surname><given-names>B</given-names></name></person-group><article-title>Parallel Implementations of word alignment tool</article-title><source>SETQA-NLP &#x2019;08: Software Engineering, Testing, and Quality Assurance for Natural Language Processing</source><year>2008</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>49</fpage><lpage>57</lpage><pub-id pub-id-type="doi">10.5555/1622110.1622119</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Vogel</surname><given-names>S</given-names></name><name name-style="western"><surname>Ney</surname><given-names>H</given-names></name><name name-style="western"><surname>Tillmann</surname><given-names>C</given-names></name></person-group><article-title>HMM-based word alignment in statistical translation</article-title><source>COLING &#x2019;96: Proceedings of the 16th Conference on Computational Linguistics - Volume 2</source><year>1996</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>836</fpage><lpage>841</lpage><pub-id pub-id-type="doi">10.3115/993268.993313</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="web"><article-title>ChristelDG/biomed_translation</article-title><source>GitHub</source><access-date>2024-03-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/ChristelDG/biomed_translation">https://github.com/ChristelDG/biomed_translation</ext-link></comment></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Johnson</surname><given-names>AEW</given-names></name><name name-style="western"><surname>Pollard</surname><given-names>TJ</given-names></name><name name-style="western"><surname>Shen</surname><given-names>L</given-names></name><etal/></person-group><article-title>MIMIC-III, a freely accessible critical care database</article-title><source>Sci Data</source><year>2016</year><month>05</month><day>24</day><volume>3</volume><issue>1</issue><fpage>160035</fpage><pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id><pub-id pub-id-type="medline">27219127</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>J</given-names></name><name name-style="western"><surname>Yoon</surname><given-names>W</given-names></name><name name-style="western"><surname>Kim</surname><given-names>S</given-names></name><etal/></person-group><article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title><source>Bioinformatics</source><year>2020</year><month>02</month><day>15</day><volume>36</volume><issue>4</issue><fpage>1234</fpage><lpage>1240</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id><pub-id pub-id-type="medline">31501885</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>K</given-names></name><name name-style="western"><surname>Altosaar</surname><given-names>J</given-names></name><name name-style="western"><surname>Ranganath</surname><given-names>R</given-names></name></person-group><article-title>ClinicalBERT: modeling clinical notes and predicting hospital readmission</article-title><source>arXiv</source><comment>Preprint posted online on  Apr 10, 2019</comment><pub-id pub-id-type="doi">10.48550/arXiv.1904.05342</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Martin</surname><given-names>L</given-names></name><name name-style="western"><surname>Muller</surname><given-names>B</given-names></name><name name-style="western"><surname>Ortiz Su&#x00E1;rez</surname><given-names>PJ</given-names></name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Kurafsky</surname><given-names>D</given-names></name><name name-style="western"><surname>Chai</surname><given-names>J</given-names></name><name name-style="western"><surname>Schluter</surname><given-names>N</given-names></name><name name-style="western"><surname>Tetreault</surname><given-names>J</given-names></name></person-group><article-title>CamemBERT: a tasty French language model</article-title><source>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</source><year>2020</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>7203</fpage><lpage>7219</lpage><pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.645</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Le</surname><given-names>H</given-names></name><name name-style="western"><surname>Vial</surname><given-names>L</given-names></name><name name-style="western"><surname>Frej</surname><given-names>J</given-names></name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Calzolari</surname><given-names>N</given-names></name><name name-style="western"><surname>B&#x00E9;chet</surname><given-names>F</given-names></name><name name-style="western"><surname>Blanche</surname><given-names>P</given-names></name><etal/></person-group><article-title>FlauBERT: unsupervised language model pre-training for French</article-title><source>Proceedings of the Twelfth Language Resources and Evaluation Conference</source><year>2020</year><access-date>2024-03-15</access-date><publisher-name>European Language Resources Association</publisher-name><fpage>2479</fpage><lpage>2490</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2020.lrec-1.302">https://aclanthology.org/2020.lrec-1.302</ext-link></comment></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wajsb&#x00FC;rt</surname><given-names>P</given-names></name><name name-style="western"><surname>Sarfati</surname><given-names>A</given-names></name><name name-style="western"><surname>Tannier</surname><given-names>X</given-names></name></person-group><article-title>Medical concept normalization in French using multilingual terminologies and contextual embeddings</article-title><source>J Biomed Inform</source><year>2021</year><month>02</month><volume>114</volume><fpage>103684</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2021.103684</pub-id><pub-id pub-id-type="medline">33450387</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yuan</surname><given-names>Z</given-names></name><name name-style="western"><surname>Zhao</surname><given-names>Z</given-names></name><name name-style="western"><surname>Sun</surname><given-names>H</given-names></name><name name-style="western"><surname>Li</surname><given-names>J</given-names></name><name name-style="western"><surname>Wang</surname><given-names>F</given-names></name><name name-style="western"><surname>Yu</surname><given-names>S</given-names></name></person-group><article-title>CODER: knowledge-infused cross-lingual medical term embedding for term normalization</article-title><source>J Biomed Inform</source><year>2022</year><month>02</month><volume>126</volume><fpage>103983</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2021.103983</pub-id><pub-id pub-id-type="medline">34990838</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Tiedemann</surname><given-names>J</given-names></name><name name-style="western"><surname>Thottingal</surname><given-names>S</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Martins</surname><given-names>A</given-names></name><name name-style="western"><surname>Moniz</surname><given-names>H</given-names></name><name name-style="western"><surname>Fumega</surname><given-names>S</given-names></name><etal/></person-group><article-title>OPUS-MT - building open translation services for the world</article-title><source>Proceedings of the 22nd Annual Conference of the European Association for Machine Translation</source><year>2020</year><access-date>2024-03-15</access-date><publisher-name>European Association for Machine Translation</publisher-name><fpage>479</fpage><lpage>480</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2020.eamt-1.61">https://aclanthology.org/2020.eamt-1.61</ext-link></comment></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names></name><name name-style="western"><surname>Chen</surname><given-names>Q</given-names></name><name name-style="western"><surname>Yang</surname><given-names>Z</given-names></name><name name-style="western"><surname>Lin</surname><given-names>H</given-names></name><name name-style="western"><surname>Lu</surname><given-names>Z</given-names></name></person-group><article-title>BioWordVec, improving biomedical word embeddings with subword information and MeSH</article-title><source>Sci Data</source><year>2019</year><month>05</month><day>10</day><volume>6</volume><issue>1</issue><fpage>52</fpage><pub-id pub-id-type="doi">10.1038/s41597-019-0055-0</pub-id><pub-id pub-id-type="medline">31076572</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Kraljevic</surname><given-names>Z</given-names></name><name name-style="western"><surname>Bean</surname><given-names>D</given-names></name><name name-style="western"><surname>Mascio</surname><given-names>A</given-names></name><etal/></person-group><article-title>MedCAT -- medical concept annotation tool</article-title><source>arXiv</source><comment>Preprint posted online on  Dec 18, 2019</comment><pub-id pub-id-type="doi">10.48550/arXiv.1912.10166</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Campos</surname><given-names>L</given-names></name><name name-style="western"><surname>Pedro</surname><given-names>V</given-names></name><name name-style="western"><surname>Couto</surname><given-names>F</given-names></name></person-group><article-title>Impact of translation on named-entity recognition in radiology texts</article-title><source>Database (Oxford)</source><year>2017</year><month>01</month><day>1</day><volume>2017</volume><issue>2017</issue><fpage>bax064</fpage><pub-id pub-id-type="doi">10.1093/database/bax064</pub-id><pub-id pub-id-type="medline">29220455</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Suarez-Paniagua</surname><given-names>V</given-names></name><name name-style="western"><surname>Dong</surname><given-names>H</given-names></name><name name-style="western"><surname>Casey</surname><given-names>A</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Faggioli</surname><given-names>G</given-names></name><name name-style="western"><surname>Ferro</surname><given-names>N</given-names></name><name name-style="western"><surname>Joly</surname><given-names>A</given-names></name><name name-style="western"><surname>Maistro</surname><given-names>M</given-names></name><name name-style="western"><surname>Piroi</surname><given-names>F</given-names></name></person-group><article-title>A multi-BERT hybrid system for named entity recognition in Spanish radiology reports</article-title><source>Proceedings of the Working Notes of CLEF 2021 - Conference and Labs of the Evaluation Forum. CEUR Workshop Proceedings, Vol 2936</source><year>2021</year><access-date>2024-03-15</access-date><publisher-name>CEUR-WS.org</publisher-name><fpage>846</fpage><lpage>856</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://ceur-ws.org/Vol-2936/paper-70.pdf">https://ceur-ws.org/Vol-2936/paper-70.pdf</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Perez-Miguel</surname><given-names>N</given-names></name><name name-style="western"><surname>Cuadros</surname><given-names>M</given-names></name><name name-style="western"><surname>Rigau</surname><given-names>G</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Calzolari</surname><given-names>N</given-names></name><name name-style="western"><surname>Choukri</surname><given-names>K</given-names></name><name name-style="western"><surname>Cieri</surname><given-names>C</given-names></name><etal/></person-group><article-title>Biomedical term normalization of EHRs with UMLS</article-title><source>Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)</source><year>2018</year><access-date>2024-03-15</access-date><publisher-name>European Language Resources Association (ELRA)</publisher-name><fpage>2045</fpage><lpage>2051</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/L18-1322">https://aclanthology.org/L18-1322</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>Y</given-names></name><name name-style="western"><surname>Zong</surname><given-names>C</given-names></name><name name-style="western"><surname>Su</surname><given-names>KYS</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Haji&#x010D;</surname><given-names>J</given-names></name><name name-style="western"><surname>Carberry</surname><given-names>S</given-names></name><name name-style="western"><surname>Clark</surname><given-names>S</given-names></name><name name-style="western"><surname>Nivre</surname><given-names>J</given-names></name></person-group><article-title>On jointly recognizing and aligning bilingual named entities</article-title><source>Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics</source><year>2010</year><access-date>2024-03-15</access-date><publisher-name>Association for Computational Linguistics</publisher-name><fpage>631</fpage><lpage>639</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/P10-1065">https://aclanthology.org/P10-1065</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>Y</given-names></name><name name-style="western"><surname>Zong</surname><given-names>C</given-names></name><name name-style="western"><surname>Su</surname><given-names>KYS</given-names></name></person-group><article-title>A joint model to identify and align bilingual named entities</article-title><source>Comput Linguist</source><year>2013</year><month>06</month><day>1</day><volume>39</volume><issue>2</issue><fpage>229</fpage><lpage>266</lpage><pub-id pub-id-type="doi">10.1162/COLI_a_00122</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Henry</surname><given-names>S</given-names></name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names></name><name name-style="western"><surname>Shen</surname><given-names>F</given-names></name><name name-style="western"><surname>Uzuner</surname><given-names>O</given-names></name></person-group><article-title>The 2019 National Natural Language Processing (NLP) Clinical Challenges (N2C2)/Open Health NLP (OHNLP) shared task on clinical concept normalization for clinical records</article-title><source>J Am Med Inform Assoc</source><year>2020</year><month>10</month><day>1</day><volume>27</volume><issue>10</issue><fpage>1529</fpage><lpage>1537</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocaa106</pub-id><pub-id pub-id-type="medline">32968800</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>N&#x00E9;v&#x00E9;ol</surname><given-names>A</given-names></name><name name-style="western"><surname>Grouin</surname><given-names>C</given-names></name><name name-style="western"><surname>Leixa</surname><given-names>J</given-names></name><name name-style="western"><surname>Rosset</surname><given-names>S</given-names></name><name name-style="western"><surname>Zweigenbaum</surname><given-names>P</given-names></name></person-group><article-title>The QUAERO French medical corpus: a resource for medical entity recognition and normalization</article-title><access-date>2024-03-15</access-date><conf-name>Fourth Workshop on Building and Evaluating Resources for Health and Biomedical Text Processing - BioTextM2014</conf-name><conf-date>May 26-31, 2014</conf-date><conf-loc>Reykjavik, Iceland</conf-loc><fpage>24</fpage><lpage>30</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://perso.limsi.fr/pz/FTPapiers/Neveol_BIOTEXTM2014.pdf">https://perso.limsi.fr/pz/FTPapiers/Neveol_BIOTEXTM2014.pdf</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kors</surname><given-names>JA</given-names></name><name name-style="western"><surname>Clematide</surname><given-names>S</given-names></name><name name-style="western"><surname>Akhondi</surname><given-names>SA</given-names></name><name name-style="western"><surname>van Mulligen</surname><given-names>EM</given-names></name><name name-style="western"><surname>Rebholz-Schuhmann</surname><given-names>D</given-names></name></person-group><article-title>A multilingual gold-standard corpus for biomedical concept recognition: the Mantra GSC</article-title><source>J Am Med Inform Assoc</source><year>2015</year><month>09</month><volume>22</volume><issue>5</issue><fpage>948</fpage><lpage>956</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocv037</pub-id><pub-id pub-id-type="medline">25948699</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Bojar</surname><given-names>O</given-names></name><name name-style="western"><surname>Chatterjee</surname><given-names>R</given-names></name><name name-style="western"><surname>Federmann</surname><given-names>C</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Bojar</surname><given-names>O</given-names></name><name name-style="western"><surname>Buck</surname><given-names>C</given-names></name><name name-style="western"><surname>Chatterjee</surname><given-names>R</given-names></name><etal/></person-group><article-title>Findings of the 2016 Conference on Machine Translation</article-title><source>Proceedings of the First Conference on Machine Translation: Volume 2, Shared Task Papers</source><year>2016</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>131</fpage><lpage>198</lpage><pub-id pub-id-type="doi">10.18653/v1/W16-2301</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Bawden</surname><given-names>R</given-names></name><name name-style="western"><surname>Bretonnel Cohen</surname><given-names>K</given-names></name><name name-style="western"><surname>Grozea</surname><given-names>C</given-names></name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Bojar</surname><given-names>O</given-names></name><name name-style="western"><surname>Chatterjee</surname><given-names>R</given-names></name><name name-style="western"><surname>Federmann</surname><given-names>C</given-names></name><etal/></person-group><article-title>Findings of the WMT 2019 Biomedical Translation Shared Task: evaluation for MEDLINE abstracts and biomedical terminologies</article-title><source>Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)</source><year>2019</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>29</fpage><lpage>53</lpage><pub-id pub-id-type="doi">10.18653/v1/W19-5403</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="thesis"><person-group person-group-type="author"><name name-style="western"><surname>Wajsb&#x00FC;rt</surname><given-names>P</given-names></name></person-group><source>Extraction and Normalization of Simple and Structured Entities in Medical Documents [thesis]</source><year>2021</year><month>12</month><access-date>2024-03-15</access-date><publisher-name>Sorbonne Universit&#x00E9;</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://theses.hal.science/THESES-SU/tel-03624928v1">https://theses.hal.science/THESES-SU/tel-03624928v1</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>G&#x00E9;rardin</surname><given-names>C</given-names></name><name name-style="western"><surname>Wajsb&#x00FC;rt</surname><given-names>P</given-names></name><name name-style="western"><surname>Vaillant</surname><given-names>P</given-names></name><name name-style="western"><surname>Bellamine</surname><given-names>A</given-names></name><name name-style="western"><surname>Carrat</surname><given-names>F</given-names></name><name name-style="western"><surname>Tannier</surname><given-names>X</given-names></name></person-group><article-title>Multilabel classification of medical concepts for patient clinical profile identification</article-title><source>Artif Intell Med</source><year>2022</year><month>06</month><volume>128</volume><fpage>102311</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2022.102311</pub-id><pub-id pub-id-type="medline">35534148</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Lample</surname><given-names>G</given-names></name><name name-style="western"><surname>Ballesteros</surname><given-names>M</given-names></name><name name-style="western"><surname>Subramanian</surname><given-names>S</given-names></name><name name-style="western"><surname>Kawakami</surname><given-names>K</given-names></name><name name-style="western"><surname>Dyer</surname><given-names>C</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Knight</surname><given-names>K</given-names></name><name name-style="western"><surname>Nenkova</surname><given-names>A</given-names></name><name name-style="western"><surname>Rambow</surname><given-names>O</given-names></name></person-group><article-title>Neural architectures for named entity recognition</article-title><source>Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source><year>2016</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>260</fpage><lpage>270</lpage><pub-id pub-id-type="doi">10.18653/v1/N16-1030</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hochreiter</surname><given-names>S</given-names></name><name name-style="western"><surname>Schmidhuber</surname><given-names>J</given-names></name></person-group><article-title>Long short-term memory</article-title><source>Neural Comput</source><year>1997</year><month>11</month><day>15</day><volume>9</volume><issue>8</issue><fpage>1735</fpage><lpage>1780</lpage><pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id><pub-id pub-id-type="medline">9377276</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>J</given-names></name><name name-style="western"><surname>El-Khamy</surname><given-names>M</given-names></name><name name-style="western"><surname>Lee</surname><given-names>J</given-names></name></person-group><article-title>Residual LSTM: design of a deep recurrent architecture for distant speech recognition</article-title><year>2017</year><conf-name>Interspeech 2017</conf-name><conf-date>Aug 20-24, 2017</conf-date><conf-loc>Stockholm, Sweden</conf-loc><fpage>1591</fpage><lpage>1595</lpage><pub-id pub-id-type="doi">10.21437/Interspeech.2017-477</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>J</given-names></name><name name-style="western"><surname>Bohnet</surname><given-names>B</given-names></name><name name-style="western"><surname>Poesio</surname><given-names>M</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Jurafsky</surname><given-names>D</given-names></name><name name-style="western"><surname>Chai</surname><given-names>J</given-names></name><name name-style="western"><surname>Schulter</surname><given-names>N</given-names></name><name name-style="western"><surname>Tetreault</surname><given-names>J</given-names></name></person-group><article-title>Named entity recognition as dependency parsing</article-title><source>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</source><year>2020</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>6470</fpage><lpage>6476</lpage><pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.577</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Kingma</surname><given-names>DP</given-names></name><name name-style="western"><surname>Ba</surname><given-names>J</given-names></name></person-group><article-title>Adam: a method for stochastic optimization</article-title><source>arXiv</source><comment>Preprint posted online on  Dec 22, 2014</comment><pub-id pub-id-type="doi">10.48550/arXiv.1412.6980</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bojanowski</surname><given-names>P</given-names></name><name name-style="western"><surname>Grave</surname><given-names>E</given-names></name><name name-style="western"><surname>Joulin</surname><given-names>A</given-names></name><name name-style="western"><surname>Mikolov</surname><given-names>T</given-names></name></person-group><article-title>Enriching word vectors with subword information</article-title><source>Trans Assoc Comput Linguist</source><year>2017</year><month>12</month><day>1</day><volume>5</volume><fpage>135</fpage><lpage>146</lpage><pub-id pub-id-type="doi">10.1162/tacl_a_00051</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>X</given-names></name><name name-style="western"><surname>Han</surname><given-names>X</given-names></name><name name-style="western"><surname>Huang</surname><given-names>W</given-names></name><name name-style="western"><surname>Dong</surname><given-names>D</given-names></name><name name-style="western"><surname>Scott</surname><given-names>MR</given-names></name></person-group><article-title>Multi-similarity loss with general pair weighting for deep metric learning</article-title><year>2019</year><conf-name>2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name><conf-date>Jun 15-20, 2019</conf-date><conf-loc>Long Beach, CA</conf-loc><fpage>5017</fpage><lpage>5025</lpage><pub-id pub-id-type="doi">10.1109/CVPR.2019.00516</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="web"><source>CNIL (Commission Nationale de l&#x2019;Informatique et des Libert&#x00E9;s)</source><access-date>2024-03-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cnil.fr/en/home">https://www.cnil.fr/en/home</ext-link></comment></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Papineni</surname><given-names>K</given-names></name><name name-style="western"><surname>Roukos</surname><given-names>S</given-names></name><name name-style="western"><surname>Ward</surname><given-names>T</given-names></name><name name-style="western"><surname>Zhu</surname><given-names>W-J</given-names></name></person-group><article-title>BLEU: a method for automatic evaluation of machine translation</article-title><source>ACL &#x2019;02: Proceedings of the 40th Annual Meeting on Association for Computational Linguistics</source><year>2002</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>311</fpage><lpage>318</lpage><pub-id pub-id-type="doi">10.3115/1073083.1073135</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Dekking</surname><given-names>FM</given-names></name><name name-style="western"><surname>Kraaikamp</surname><given-names>C</given-names></name><name name-style="western"><surname>Lopuhaa</surname><given-names>HP</given-names></name><name name-style="western"><surname>Meester</surname><given-names>LE</given-names></name></person-group><source>A Modern Introduction to Probability and Statistics: Understanding Why and How</source><year>2007</year><publisher-name>Springer Nature</publisher-name></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Cotik</surname><given-names>V</given-names></name><name name-style="western"><surname>Rodr&#x00ED;guez</surname><given-names>H</given-names></name><name name-style="western"><surname>Vivaldi</surname><given-names>J</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Lossio-Ventura</surname><given-names>J</given-names></name><name name-style="western"><surname>Mu&#x00F1;ante</surname><given-names>D</given-names></name><name name-style="western"><surname>Alatrista-Salas</surname><given-names>H</given-names></name></person-group><article-title>Spanish named entity recognition in the biomedical domain</article-title><source>Information Management and Big Data. SIMBig 2018. Communications in Computer and Information Science, vol 898</source><publisher-name>Springer</publisher-name><fpage>233</fpage><lpage>248</lpage><pub-id pub-id-type="doi">10.1007/978-3-030-11680-4</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Hellrich</surname><given-names>J</given-names></name><name name-style="western"><surname>Hahn</surname><given-names>U</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>M&#x00E9;tais</surname><given-names>E</given-names></name><name name-style="western"><surname>Roche</surname><given-names>M</given-names></name><name name-style="western"><surname>Teisseire</surname><given-names>M</given-names></name></person-group><article-title>Enhancing multilingual biomedical terminologies via machine translation from parallel corpora</article-title><source>Natural Language Processing and Information Systems. NLDB 2014. Lecture Notes in Computer Science, vol 8455</source><year>2014</year><publisher-name>Springer</publisher-name><fpage>9</fpage><lpage>20</lpage><pub-id pub-id-type="doi">10.1007/978-3-319-07983-7_2</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Attardi</surname><given-names>G</given-names></name><name name-style="western"><surname>Buzzelli</surname><given-names>A</given-names></name><name name-style="western"><surname>Sartiano</surname><given-names>D</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Forner</surname><given-names>P</given-names></name><name name-style="western"><surname>Navigli</surname><given-names>R</given-names></name><name name-style="western"><surname>Tufis</surname><given-names>D</given-names></name><name name-style="western"><surname>Ferro</surname><given-names>N</given-names></name></person-group><article-title>Machine translation for entity recognition across languages in BIOMEDICAL documents</article-title><source>Working Notes for CLEF 2013 Conference. CEUR Workshop Proceedings, Vol 1179</source><year>2013</year><access-date>2024-03-15</access-date><publisher-name>CEUR-WS.org</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://ceur-ws.org/Vol-1179/CLEF2013wn-CLEFER-AttardiEt2013.pdf">https://ceur-ws.org/Vol-1179/CLEF2013wn-CLEFER-AttardiEt2013.pdf</ext-link></comment></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cimino</surname><given-names>JJ</given-names></name></person-group><article-title>Auditing the Unified Medical Language System with semantic methods</article-title><source>J Am Med Inform Assoc</source><year>1998</year><volume>5</volume><issue>1</issue><fpage>41</fpage><lpage>51</lpage><pub-id pub-id-type="doi">10.1136/jamia.1998.0050041</pub-id><pub-id pub-id-type="medline">9452984</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jim&#x00E9;nez-Ruiz</surname><given-names>E</given-names></name><name name-style="western"><surname>Grau</surname><given-names>BC</given-names></name><name name-style="western"><surname>Horrocks</surname><given-names>I</given-names></name><name name-style="western"><surname>Berlanga</surname><given-names>R</given-names></name></person-group><article-title>Logic-based assessment of the compatibility of UMLS ontology sources</article-title><source>J Biomed Semantics</source><year>2011</year><month>03</month><day>7</day><volume>2 Suppl 1</volume><issue>Suppl 1</issue><fpage>S2</fpage><pub-id pub-id-type="doi">10.1186/2041-1480-2-S1-S2</pub-id><pub-id pub-id-type="medline">21388571</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="web"><source>Assistance Publique H&#x00F4;pitaux de Paris</source><access-date>2024-03-18</access-date><comment><ext-link ext-link-type="uri" xlink:href="http://www.eds.aphp.fr">www.eds.aphp.fr</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Detailed description of the data sets, an example of the clinical notes annotation, French corpus annotation, MedCAT performances, and error analysis.</p><media xlink:href="medinform_v12i1e49607_app1.docx" xlink:title="DOCX File, 154 KB"/></supplementary-material></app-group></back></article>