<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i2e30345</article-id>
      <article-id pub-id-type="pmid">35179507</article-id>
      <article-id pub-id-type="doi">10.2196/30345</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Evaluation of Natural Language Processing for the Identification of Crohn Disease–Related Variables in Spanish Electronic Health Records: A Validation Study for the PREMONITION-CD Project</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Shung</surname>
            <given-names>Dennis</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chen</surname>
            <given-names>YenPin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sánchez-Laguna</surname>
            <given-names>Francisco José</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Montoto</surname>
            <given-names>Carmen</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <address>
            <institution>Takeda Farmacéutica España S.A.</institution>
            <addr-line>Edificio Torre Europa</addr-line>
            <addr-line>Paseo de la Castellana, 95</addr-line>
            <addr-line>Madrid, 28046</addr-line>
            <country>Spain</country>
            <phone>34 917904222</phone>
            <email>Carmen.montoto@takeda.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3877-9462</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Gisbert</surname>
            <given-names>Javier P</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff02" ref-type="aff">2</xref>
          <xref rid="aff03" ref-type="aff">3</xref>
          <xref rid="aff04" ref-type="aff">4</xref>
          <xref rid="aff05" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2090-3445</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Guerra</surname>
            <given-names>Iván</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff06" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5175-2515</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Plaza</surname>
            <given-names>Rocío</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff07" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0893-4599</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Pajares Villarroya</surname>
            <given-names>Ramón</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff08" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0549-6036</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Moreno Almazán</surname>
            <given-names>Luis</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff09" ref-type="aff">9</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4914-0296</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>López Martín</surname>
            <given-names>María Del Carmen</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff10" ref-type="aff">10</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0517-8110</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Domínguez Antonaya</surname>
            <given-names>Mercedes</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff11" ref-type="aff">11</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0549-3495</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Vera Mendoza</surname>
            <given-names>Isabel</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff12" ref-type="aff">12</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3021-2413</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Aparicio</surname>
            <given-names>Jesús</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4736-7999</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>Martínez</surname>
            <given-names>Vicente</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
        </contrib>
        <contrib id="contrib12" contrib-type="author">
          <name name-style="western">
            <surname>Tagarro</surname>
            <given-names>Ignacio</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8975-0657</ext-link>
        </contrib>
        <contrib id="contrib13" contrib-type="author">
          <name name-style="western">
            <surname>Fernandez-Nistal</surname>
            <given-names>Alonso</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5097-4474</ext-link>
        </contrib>
        <contrib id="contrib14" contrib-type="author">
          <name name-style="western">
            <surname>Canales</surname>
            <given-names>Lea</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff13" ref-type="aff">13</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5018-5400</ext-link>
        </contrib>
        <contrib id="contrib15" contrib-type="author">
          <name name-style="western">
            <surname>Menke</surname>
            <given-names>Sebastian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff14" ref-type="aff">14</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2588-6405</ext-link>
        </contrib>
        <contrib id="contrib16" contrib-type="author">
          <name name-style="western">
            <surname>Gomollón</surname>
            <given-names>Fernando</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff15" ref-type="aff">15</xref>
          <xref rid="aff16" ref-type="aff">16</xref>
          <xref rid="aff17" ref-type="aff">17</xref>
          <xref rid="aff18" ref-type="aff">18</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0076-3529</ext-link>
        </contrib>
        <contrib id="contrib17" contrib-type="author">
          <collab>PREMONITION-CD Study Group</collab>
          <xref rid="aff19" ref-type="aff">19</xref>
        </contrib>
      </contrib-group>
      <aff id="aff01">
        <label>1</label>
        <institution>Takeda Farmacéutica España S.A.</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff02">
        <label>2</label>
        <institution>Hospital Universitario de La Princesa</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff03">
        <label>3</label>
        <institution>Instituto de Investigación Sanitaria Princesa (IIS-IP)</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff04">
        <label>4</label>
        <institution>Universidad Autónoma de Madrid</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff05">
        <label>5</label>
        <institution>Centro de Investigación Biomédica en Red de Enfermedades Hepáticas y Digestivas (CIBEREHD)</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff06">
        <label>6</label>
        <institution>Hospital Universitario de Fuenlabrada</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff07">
        <label>7</label>
        <institution>Hospital Universitario Infanta Leonor</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff08">
        <label>8</label>
        <institution>Hospital Universitario Infanta Sofía</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff09">
        <label>9</label>
        <institution>Hospital Universitario HM Montepríncipe</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff10">
        <label>10</label>
        <institution>Hospital Universitario Infanta Elena</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff11">
        <label>11</label>
        <institution>Hospital Universitario Rey Juan Carlos</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff12">
        <label>12</label>
        <institution>Hospital Universitario Puerta de Hierro Majadahonda</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff13">
        <label>13</label>
        <institution>Department of Software and Computing System</institution>
        <institution>University of Alicante</institution>
        <addr-line>Alicante</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff14">
        <label>14</label>
        <institution>MedSavana SL</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff15">
        <label>15</label>
        <institution>Hospital Clínico Universitario Lozano Blesa</institution>
        <addr-line>Zaragoza</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff16">
        <label>16</label>
        <institution>Instituto de Investigación Sanitaria Aragón (IISA)</institution>
        <addr-line>Zaragoza</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff17">
        <label>17</label>
        <institution>Universidad de Zaragoza</institution>
        <addr-line>Zaragoza</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff18">
        <label>18</label>
        <institution>Centro de Investigación Biomédica en Red de Enfermedades Hepáticas y Digestivas (CIBEREHD)</institution>
        <addr-line>Zaragoza</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff19">
        <label>19</label>
        <institution>See Acknowledgements</institution>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Carmen Montoto <email>Carmen.montoto@takeda.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>2</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>18</day>
        <month>2</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>2</issue>
      <elocation-id>e30345</elocation-id>
      <history>
        <date date-type="received">
          <day>11</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>29</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>22</day>
          <month>7</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>2</day>
          <month>1</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Carmen Montoto, Javier P Gisbert, Iván Guerra, Rocío Plaza, Ramón Pajares Villarroya, Luis Moreno Almazán, María Del Carmen López Martín, Mercedes Domínguez Antonaya, Isabel Vera Mendoza, Jesús Aparicio, Vicente Martínez, Ignacio Tagarro, Alonso Fernandez-Nistal, Lea Canales, Sebastian Menke, Fernando Gomollón,  PREMONITION-CD Study Group. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 18.02.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/2/e30345" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The exploration of clinically relevant information in the free text of electronic health records (EHRs) holds the potential to positively impact clinical practice as well as knowledge regarding Crohn disease (CD), an inflammatory bowel disease that may affect any segment of the gastrointestinal tract. The EHRead technology, a clinical natural language processing (cNLP) system, was designed to detect and extract clinical information from narratives in the clinical notes contained in EHRs.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study is to validate the performance of the EHRead technology in identifying information of patients with CD.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used the EHRead technology to explore and extract CD-related clinical information from EHRs. To validate this tool, we compared the output of the EHRead technology with a manually curated gold standard to assess the quality of our cNLP system in detecting records containing any reference to CD and its related variables.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The validation metrics for the main variable (CD) were a precision of 0.88, a recall of 0.98, and an F1 score of 0.93. Regarding the secondary variables, we obtained a precision of 0.91, a recall of 0.71, and an F1 score of 0.80 for CD flare, while for the variable vedolizumab (treatment), a precision, recall, and F1 score of 0.86, 0.94, and 0.90 were obtained, respectively.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This evaluation demonstrates the ability of the EHRead technology to identify patients with CD and their related variables from the free text of EHRs. To the best of our knowledge, this study is the first to use a cNLP system for the identification of CD in EHRs written in Spanish.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>linguistic validation</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>electronic health records</kwd>
        <kwd>Crohn disease</kwd>
        <kwd>inflammatory bowel disease</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Crohn disease (CD) is a chronic inflammatory bowel disease (IBD) that leads to lesions in different sites along the length of the gastrointestinal tract and, occasionally, in other extraintestinal locations such as skin, eyes, joints, mouth, and the hepatobiliary system [<xref ref-type="bibr" rid="ref1">1</xref>]. Symptoms (including abdominal pain, diarrhea, fever, and weight loss) evolve in a relapsing and remitting manner, leading to bowel damage and disability. CD is considered to be a heterogeneous disorder with a multifactorial etiology, in which genetics and environmental factors interact to manifest the disease [<xref ref-type="bibr" rid="ref2">2</xref>]. Although most patients with CD are diagnosed with an inflammatory phenotype, about half of them do require surgeries derived from complications such as strictures, fistulas, or abscesses [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
      <p>Over the last years, most health care institutions have moved away from paper clinical records toward electronic health records (EHRs) in which patients’ longitudinal medical information is stored [<xref ref-type="bibr" rid="ref4">4</xref>]. Since then, large volumes of digitalized real-world clinical data have been generated at exponential rates. Although some clinical data contained in the EHRs are stored in structured fields, the majority of the relevant clinical information appears embedded in the free-text narratives written down by health professionals [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
      <p>The area of computer science dedicated to the analysis and representation of naturally occurring texts (written or oral) [<xref ref-type="bibr" rid="ref6">6</xref>] is called natural language processing (NLP). One of the applications of NLP focuses on the extraction of information from free text captured in EHRs and is therefore referred to as clinical NLP (cNLP). So far, cNLP systems have been successfully applied for the extraction of relevant clinical information using approaches such as regular expressions or machine learning. As a result, the quantity and quality of data captured from the EHRs have substantially increased over recent years [<xref ref-type="bibr" rid="ref7">7</xref>]. Although incorporating information from free text into case detection through NLP techniques improves research quality [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>], one key challenge in this process is to ensure the validity of the results by assessing the detection performance.</p>
      <p>In this context, as part of the PREMONITION-CD observational study, we aimed to assess the performance of the cNLP system <italic>EHRead</italic> technology [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref15">15</xref>] in identifying medical records that contain mentions of CD and its related variables when compared to the detection performed by expert medical doctors. Because the manual review of free-text narratives is extremely time-consuming, valuable information routinely collected in clinical practice has largely remained unused for research purposes. Therefore, the validated automatic extraction of this information holds potential to advance our knowledge about CD and could have a positive impact in the management of these patients [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>].</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethics Approval and Consent to Participate</title>
        <p>This study was conducted within the scope of the PREMONITION-CD project, a multicenter, retrospective study aimed at using NLP to detect free-text information in CD patients’ EHRs. Before the start of data collection, the study was approved by the Spanish Ethics Committee, Agencia Española de Medicamentos y Productos Sanitarios, and the Madrid region Ethics Committee, Comité Ético de Investigación con Medicamentos Regional de la Comunidad de Madrid, with reference number IBD-5002 (May 2018). Approval from each of the hospitals participating in the study was also obtained. It was registered in <italic>ClinicalTrials.gov</italic> with the identifier number NCT03668249.</p>
        <p>The study was conducted in compliance with legal and regulatory requirements and followed generally accepted research practices described in the ICH Guideline for Good Clinical Practice, the Declaration of Helsinki in its latest edition, Good Pharmacoepidemiology Practices, and applicable local regulations.</p>
      </sec>
      <sec>
        <title>Consent for Publication</title>
        <p>In accordance with article 14.5 of the General Data Protection Regulation (GDPR), if obtaining consent is impossible or would involve a disproportionate effort, in particular for processing for archiving purposes in the public interest, scientific or historical research purposes, or statistical purposes, the study is subject to the conditions and safeguards referred to in Article 89.</p>
        <p>Regarding Article 89 of the GDPR, processing in the public interest or scientific research purposes shall be subject to appropriate safeguards and will not require consent from each of the data subjects, in accordance with the GDPR, for the rights and freedoms of the data subject.</p>
      </sec>
      <sec>
        <title>Availability of Data and Materials</title>
        <p>Due to the retrospective nature of the research, data analysis did not require consent from the data subjects. Therefore, supporting data is subject to strict confidentiality agreements with each participating hospital and cannot be made openly available.</p>
      </sec>
      <sec>
        <title>Data Source</title>
        <p>Data were collected from 8 hospitals of the Spanish National Healthcare Network from January 1, 2014, to December 31, 2018 (except for one participating site with electronic data available from 2013 to 2017).</p>
      </sec>
      <sec>
        <title>Study Design</title>
        <p>For this study, the assessed variables were CD, CD flare (a crucial variable for the characterization of the evolution of the disease), and vedolizumab (a biologic drug indicated exclusively for the treatment of IBD). The variables included in this study were selected by the senior study committee based on the PREMONITION-CD overall study objectives. The variables were detected when written directly in the EHRs, without inferences or prior outcome definitions. The human annotations served the purpose of the creation of a gold standard to which the EHRead technology was compared.</p>
        <p>The EHRead technology is an NLP system designed to retrieve large amounts of biomedical information contained in EHRs [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref15">15</xref>] and convert the information into a structured representation (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <p>To perform this study, we completed the following steps: EHR collection, processing using EHRead technology, creation of the gold standard data set, and comparison of both outputs using standard measures of performance (<xref rid="figure2" ref-type="fig">Figure 2</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Extracting and organizing unstructured clinical data into a structured database. The EHRead technology is a clinical NLP system that detects and extracts clinically relevant information contained in deidentified EHRs. The extracted information from participating sites is organized in a structured study database. From this database, patients that fulfill the study criteria based on the study inclusion and exclusion criteria make up the target population. In this case, clinical data from the population with a diagnosis of Crohn disease were used. EHR: electronic health record; NLP: natural language processing.</p>
          </caption>
          <graphic xlink:href="medinform_v10i2e30345_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Linguistic evaluation process. To validate the output of the EHRead technology, a statistical comparison was performed between its output and a gold standard consisting of a subset of EHRs annotated by expert physicians. The validation metrics calculated are expressed in terms of precision, recall, and F1 score. See text for further details. EHR: electronic health record.</p>
          </caption>
          <graphic xlink:href="medinform_v10i2e30345_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>In the <italic>EHR collection step</italic>, a data set was selected that consisted of a sample collection of EHRs obtained primarily from the gastroenterology service (including consultation, hospitalization, and emergency reports), representing more than 3,900,000 patients. To obtain a representative data set, 100 records were randomly selected from each of the 8 sites containing EHRs with and without CD-related information, amounting to a total of 800 clinical documents from 800 patients. Subsequently, all records were fully anonymized to meet legal and ethical requirements before they were annotated by physicians (annotators) to generate a gold standard for each participating site (see sections about annotation process and gold standard).</p>
        <p>In parallel to the annotation task carried out by physicians, the EHRead technology was applied on the free text of the same EHRs used to generate the gold standard (for more details see NLP System). By doing so, the performance of the EHRead technology could directly be compared to human performance in detection of CD and secondary variables.</p>
        <p>In the final step of the evaluation, the performance of the EHRead technology was compared against the gold standard to validate the capacity of the technology in identifying records containing mentions of CD and its related variables. Therefore, both the detections of physicians and the EHRead technology were transformed into binaries (0 no detection, 1 detection) for each variable to calculate the performance metrics precision, recall, and F1 score using the library scikit-learn [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
      </sec>
      <sec>
        <title>NLP System</title>
        <p>The main phases of the NLP system were the following:</p>
        <list list-type="bullet">
          <list-item>
            <p>The section identification phase aims to detect the different parts of a clinical document, such as family medical history, physical exam, and treatment.</p>
          </list-item>
          <list-item>
            <p>The concept identification phase is when the system detects a medical concept. Specifically, the terminology considered by the EHRead technology is built upon SNOMED-CT (Systemized Nomenclature of Medicine–Clinical Terms), a leading platform of systematically organized and computer-readable collections of medical concepts. SNOMED-CT includes codes, concepts, synonyms, and definitions used in clinical documentation and is considered the most comprehensive terminology worldwide.</p>
          </list-item>
          <list-item>
            <p>The contextual information phase focuses on detecting the attributes of the already identified clinical terms within their textual context, both from an intention perspective (the term is either stated in an affirmative way or negated, or is part of a conjecture or opinion) and from a temporal perspective (current or historical).</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Annotation Process and Gold Standard</title>
        <p>The manual revision of clinical texts was carried out by annotators specialized in gastroenterology. For the annotation task, guidelines were jointly created by internal NLP experts and clinical experts. They included the variables to be annotated in the free text, along with recommendations on how to solve uncertainties. Following these guidelines, specialists reviewed the free text of selected EHRs for the occurrence of the study variables to answer a set of yes/no questions: Does/did the patient have CD? Does the report state that the patient has had a flare? and Does the record state that the patient was treated with Vedolizumab? The second and third questions were only asked if the first one was affirmative, meaning that the patient did have CD before or at the time point of the hospital visit. The annotators were not allowed to respond with <italic>yes</italic> to any of the questions based on inference.</p>
        <p>Of the 100 records selected per site, 15 were reviewed by two independent annotators to assess the interannotator agreement [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. A low agreement indicates that the annotators had difficulties in linguistically identifying the relevant variables in the EHRs or that the guidelines are still inadequate in properly describing the annotation task [<xref ref-type="bibr" rid="ref21">21</xref>]. Thus, the interannotator agreement serves as a control mechanism to check the reliability of the annotation and further to establish a target of performance for the NLP system. For this task, the annotators were not allowed to communicate with each other or share information regarding the annotation process to avoid bias. Once the annotations were finished, the interannotator agreement was calculated in terms of F1 score. Once the quality of annotations had been verified through the interannotator agreement and the disagreements had been resolved to build the final gold standard, one of the two physicians annotated the remaining 85% of clinical records to complete the gold standard.</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>The performance of the EHRead technology in identifying CD and its related variables was compared with the gold standard. The agreement between them was calculated using three metrics: <italic>precision</italic> (ie, positive predictive value), <italic>recall</italic> (ie, sensitivity), and their harmonic mean <italic>F1 score</italic> [<xref ref-type="bibr" rid="ref21">21</xref>]. <italic>Precision</italic> is the indicator of the accuracy of information retrieved by the system, <italic>recall</italic> is the indicator of the amount of information the system retrieves, and <italic>F1 score</italic> conveys the balance between precision and recall. In addition to those metrics, we calculated the 95% CI for each aforementioned measure, since this provides information about the range in which the true value lies and thus how robust the metric is. The method used to calculate the 95% CIs is the Clopper-Pearson approach, one of the most common methods for calculating binomial 95% CIs.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>The gold standard data set (N=800) consisted of 41.4% (n=331) medical records with CD, 21.3% (n=170) with CD flare, and 10% (n=83) with vedolizumab treatment. <xref ref-type="table" rid="table1">Table 1</xref> shows the interannotator agreement F1 scores of the gold standard for each investigated variable per site.</p>
      <p>The interannotator agreement values were higher than 0.8 for all comparisons, indicating an <italic>almost perfect</italic> agreement according to the Landis and Koch scale [<xref ref-type="bibr" rid="ref19">19</xref>]. In addition, the overall agreement between all sites was <italic>almost perfect</italic> [<xref ref-type="bibr" rid="ref22">22</xref>] for the three studied variables. The EHRead technology results in terms of <italic>precision</italic>, <italic>recall</italic>, and <italic>F1 score</italic> are shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
      <p>The detection of the main variable (ie, CD) achieved a <italic>precision</italic> of 0.88, a <italic>recall</italic> of 0.98, and an <italic>F1 score</italic> of 0.93. Regarding the secondary variables, CD flare obtained a <italic>precision</italic> of 0.91, a <italic>recall</italic> of 0.71, and an <italic>F1 score</italic> of 0.80, while the variable vedolizumab was detected at a <italic>precision</italic> of 0.86, a <italic>recall</italic> of 0.94, and an <italic>F1 score</italic> of 0.90.</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Interannotator agreement (F1 score) per participating site.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="250"/>
          <col width="250"/>
          <col width="250"/>
          <col width="250"/>
          <thead>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="3">F1 score</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Crohn disease</td>
              <td>Crohn disease flare</td>
              <td>Vedolizumab</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Site 1</td>
              <td>0.93</td>
              <td>0.86</td>
              <td>1.00</td>
            </tr>
            <tr valign="top">
              <td>Site 2</td>
              <td>1.00</td>
              <td>0.87</td>
              <td>1.00</td>
            </tr>
            <tr valign="top">
              <td>Site 3</td>
              <td>1.00</td>
              <td>1.00</td>
              <td>1.00</td>
            </tr>
            <tr valign="top">
              <td>Site 4</td>
              <td>0.93</td>
              <td>1.00</td>
              <td>1.00</td>
            </tr>
            <tr valign="top">
              <td>Site 5</td>
              <td>0.93</td>
              <td>0.83</td>
              <td>1.00</td>
            </tr>
            <tr valign="top">
              <td>Site 6</td>
              <td>0.93</td>
              <td>1.00</td>
              <td>1.00</td>
            </tr>
            <tr valign="top">
              <td>Site 7</td>
              <td>1.00</td>
              <td>1.00</td>
              <td>1.00</td>
            </tr>
            <tr valign="top">
              <td>Site 8</td>
              <td>1.00</td>
              <td>0.85</td>
              <td>1.00</td>
            </tr>
            <tr valign="top">
              <td>Average</td>
              <td>0.97</td>
              <td>0.93</td>
              <td>1.00</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Performance of the EHRead technology.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="250"/>
          <col width="250"/>
          <col width="250"/>
          <col width="250"/>
          <thead>
            <tr valign="top">
              <td>Variable</td>
              <td>Precision (95% CI)</td>
              <td>Recall (95% CI)</td>
              <td>F1 score (95% CI)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Crohn disease</td>
              <td>0.88 (0.85-0.91)</td>
              <td>0.98 (0.95-0.99)</td>
              <td>0.93 (0.90-0.95)</td>
            </tr>
            <tr valign="top">
              <td>Crohn disease flare</td>
              <td>0.91 (0.85-0.95)</td>
              <td>0.71 (0.63-0.77)</td>
              <td>0.80 (0.72-0.85)</td>
            </tr>
            <tr valign="top">
              <td>Vedolizumab</td>
              <td>0.86 (0.76-0.93)</td>
              <td>0.94 (0.86-0.98)</td>
              <td>0.90 (0.81-0.96)</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>The evaluation presented here is part of the observational, retrospective PREMONITION-CD study, designed to characterize clinical and nonclinical variables of patients with CD. To the best of our knowledge, this is the first multicentric study using a cNLP system for the identification of prespecified CD-related variables from reports written in Spanish. The intrinsic characteristics of IBD and the current dilemmas associated with the medical management of affected patients present an opportunity for the implementation of big data research strategies. Artificial intelligence techniques complement current research efforts and might be key in disentangling the complexity of the disease [<xref ref-type="bibr" rid="ref23">23</xref>] by allowing key patient-centered information to be retrieved and analyzed at a larger population scale. In turn, large CD/IBD data sets will enable the identification of clinical patterns, patient management, and predictors of disease that will ultimately improve patient care.</p>
      <p>Although some clinical data is stored in structured fields of EHRs, the majority is contained in the narrative free text [<xref ref-type="bibr" rid="ref4">4</xref>]. The automated extraction of these data using modern NLP techniques has a strikingly positive impact on clinical practice, since it enables the exploration of this valuable patient information at a scale that was not possible before. Here, we evaluated Savana’s EHRead technology, a cNLP system designed to detect and extract clinically relevant information from the free text of EHRs [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref15">15</xref>], to identify CD reports from narrative clinical data.</p>
      <p>In contrast to other research studies that applied NLP techniques on Spanish EHRs obtained from a single medical center [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>], this study combined data from eight large hospitals, thereby providing robustness and enabling generalizability. The capabilities of the EHRead technology allowed us to process a wide range of document types and to handle the different internal structures of clinical reports from the different participating sites. In addition, the inclusion of different sites enhanced the variability and richness of the language regarding the evaluated variables. Indeed, the variables evaluated were expressed in different ways across sites, including discrepancies in abbreviations or acronyms.</p>
      <p><italic>F1 scores</italic> higher than 0.80 for all interannotator agreements ensure that the gold standard met the criteria to serve as reference. In addition, our study demonstrates a good performance of the EHRead technology in identifying reports that contain mentions of CD and CD-related variables. We obtained <italic>F1 scores</italic> higher than 90% for the main variable and close to 80% for the remaining variables (<xref ref-type="table" rid="table2">Table 2</xref>). Despite the intrinsic heterogeneity of EHRs resulting from a variability in physicians, data collection sites, and record completeness, EHRead was successful at pinpointing important information, as reflected by these assessment parameters. Indeed, <italic>precision</italic> and <italic>recall</italic> were balanced for most of the variables, showing that the EHRead technology is not only accurate when detecting the evaluated variables but also in terms of retrieving a large amount of information.</p>
      <p>Although this study deals with EHRs in Spanish, most previous cNLP systems focused on information extraction from clinical reports in English [<xref ref-type="bibr" rid="ref26">26</xref>]. <italic>F1 scores</italic> of cNLP systems that target EHRs in English range from 0.71 to 0.92 [<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref31">31</xref>]. Available rule-based [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref31">31</xref>] or machine learning–oriented [<xref ref-type="bibr" rid="ref25">25</xref>] systems that identify medical entities in Spanish have reached <italic>F1 scores</italic> between 0.70 and 0.90. However, the cNLP systems targeting the Spanish language are still limited. A direct comparison between the EHRead technology and these state-of-the-art approaches is complicated due to differences in gold standard creation and use of language. Nevertheless, the overall performance of the EHRead technology across the eight participating sites with the achieved <italic>F1 scores</italic> demonstrates that the performance is comparable to other state-of-the-art NLP systems available in the clinical domain. Furthermore, compared to previous works that detect CD-related variables in English using NLP to increase or correctly classify the number of patients with CD detected through the standard International Classification of Diseases-9 coding system [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>], our study relies on a purely NLP-dependent detection approach. Having performed our study in Spanish is an added value, since it is a language in which NLP has not been previously applied in CD studies, nonetheless yielding robust results compared to previous approaches in English.</p>
      <p>A robust linguistic validation of the EHRead technology sets it forth as a valuable methodology for future studies regarding IBD and CD. The expanding use of EHRs and the wealth of information contained within their free text represent a unique source of data that benefits from the development of cNLP systems. Indeed, cNLP systems are dynamic and evolve with novel technologies that improve concept identification [<xref ref-type="bibr" rid="ref21">21</xref>]. This approach is suitable to better detect clinical information of patients with IBD and CD in a real-world setting, which can provide insight to improve the medical management of these patients.</p>
      <p>In conclusion, this study presents an evaluation of the EHRead technology, an NLP system for the extraction of clinical information from the narrative free text contained in EHRs. This evaluation clearly demonstrates the ability of the EHRead technology to identify mentions of CD and two related variables. Although further research is needed, the use of the EHRead technology facilitates the automated large-scale analysis of CD, thus contributing to the improvement of clinical practice by generating real-world evidence. Robust data extraction and precise variable detection are key to support future studies using large data sets of patients with CD.</p>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CD</term>
          <def>
            <p>Crohn disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">cNLP</term>
          <def>
            <p>clinical natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">GDPR</term>
          <def>
            <p>General Data Protection Regulation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">IBD</term>
          <def>
            <p>inflammatory bowel disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SNOMED-CT</term>
          <def>
            <p>Systemized Nomenclature of Medicine–Clinical Terms</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We would like to thank Tamara Pozo, Marta Mengual, and Ana Sánchez Gabriel for their kind support during the study, and Stephanie Marchesseau for valuable comments on a previous version of this manuscript. We are grateful to Laura Yebes, Carlos Del Rio-Bermudez, Ana Lopez-Ballesteros, and Clara L Oeste for their assistance in writing and editing the manuscript, and the construction of figures funded by Takeda.</p>
      <p>The PREMONITION-CD Study Group includes the following investigators: Carlos Castaño from Hospital Universitario (HU) Rey Juan Carlos, Madrid, Spain; Ángel Ponferrada Díaz from HU Infanta Leonor, Madrid, Spain; María Chaparro and María José Casanova from HU de La Princesa, Madrid, Spain; Felipe Ramos Zabala from HM Hospitales, Madrid, Spain; Almudena Calvache from HU Infanta Elena, Madrid, Spain; Fernando Bermejo from HU de Fuenlabrada, Madrid, Spain; Noemí Manceñido from HU Infanta Sofía, Madrid, Spain; and Marta Calvo Moya from HU Puerta de Hierro, Majadahonda, Madrid, Spain.</p>
      <p>This study was funded by Takeda Farmacéutica España S.A. The analyses conducted by Medsavana SL as well as the participation of the Medsavana authors in the development of this manuscript were funded by Takeda Farmacéutica España S.A.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>All authors have made substantial contributions to the conception and design of the study, and acquisition, analysis, and interpretation of data, in addition to drafting and revising the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>JPG has served as a speaker, a consultant, and advisory member for, or has received research funding from, MSD, Abbvie, Hospira, Pfizer, Kern Pharma, Biogen, Takeda, Janssen, Roche, Sandoz, Celgene, Ferring, Faes Farma, Shire Pharmaceuticals, Dr. Falk Pharma, Tillotts Pharma, Chiesi, Casen Fleet, Gebro Pharma, Otsuka Pharmaceutical, and Vifor Pharma. IG has served as a speaker, a consultant, and advisory member for, or has received research funding from, Kern Pharma, Takeda, and Janssen. RP has served as a speaker for Takeda and Janssen. MIVM has served as a speaker, consultant, and advisory member for, or has received funding from, MSD, Abbvie, Pfizer, Ferring, Shire Pharmaceuticals, Takeda, and Jannsen. FG has received educational grants from Janssen, MSD, Takeda, and Abbvie, and nonpersonal investigation grants from MSD, Janssen, Abbvie, Takeda, and Tilllots. CM, JA, VM, IT, and AFN are employees at Takeda Farmacéutica España S.A. LC is an ex-employee and SM is currently employed at Medsavana SL, which received funding from Takeda Farmacéutica España S.A. The remaining authors have no conflicts of interest to declare.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Freeman</surname>
              <given-names>HJ</given-names>
            </name>
          </person-group>
          <article-title>Natural history and long-term clinical course of Crohn's disease</article-title>
          <source>World J Gastroenterol</source>
          <year>2014</year>
          <month>01</month>
          <day>07</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>31</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.wjgnet.com/1007-9327/full/v20/i1/31.htm"/>
          </comment>
          <pub-id pub-id-type="doi">10.3748/wjg.v20.i1.31</pub-id>
          <pub-id pub-id-type="medline">24415855</pub-id>
          <pub-id pub-id-type="pmcid">PMC3886024</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ananthakrishnan</surname>
              <given-names>AN</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology and risk factors for IBD</article-title>
          <source>Nat Rev Gastroenterol Hepatol</source>
          <year>2015</year>
          <month>04</month>
          <volume>12</volume>
          <issue>4</issue>
          <fpage>205</fpage>
          <lpage>17</lpage>
          <pub-id pub-id-type="doi">10.1038/nrgastro.2015.34</pub-id>
          <pub-id pub-id-type="medline">25732745</pub-id>
          <pub-id pub-id-type="pii">nrgastro.2015.34</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ramadas</surname>
              <given-names>AV</given-names>
            </name>
            <name name-style="western">
              <surname>Gunesh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>GAO</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>GT</given-names>
            </name>
            <name name-style="western">
              <surname>Hawthorne</surname>
              <given-names>AB</given-names>
            </name>
          </person-group>
          <article-title>Natural history of Crohn's disease in a population-based cohort from Cardiff (1986-2003): a study of changes in medical treatment and surgical resection rates</article-title>
          <source>Gut</source>
          <year>2010</year>
          <month>09</month>
          <volume>59</volume>
          <issue>9</issue>
          <fpage>1200</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1136/gut.2009.202101</pub-id>
          <pub-id pub-id-type="medline">20650924</pub-id>
          <pub-id pub-id-type="pii">gut.2009.202101</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Del Rio-Bermudez</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Medrano</surname>
              <given-names>IH</given-names>
            </name>
            <name name-style="western">
              <surname>Yebes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Poveda</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Towards a symbiotic relationship between big data, artificial intelligence, and hospital pharmacy</article-title>
          <source>J Pharm Policy Pract</source>
          <year>2020</year>
          <month>11</month>
          <day>09</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>75</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://joppp.biomedcentral.com/articles/10.1186/s40545-020-00276-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s40545-020-00276-6</pub-id>
          <pub-id pub-id-type="medline">33292570</pub-id>
          <pub-id pub-id-type="pii">10.1186/s40545-020-00276-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7650184</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Language, structure, and reuse in the electronic health record</article-title>
          <source>AMA J Ethics</source>
          <year>2017</year>
          <month>03</month>
          <day>01</day>
          <volume>19</volume>
          <issue>3</issue>
          <fpage>281</fpage>
          <lpage>288</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journalofethics.ama-assn.org/article/language-structure-and-reuse-electronic-health-record/2017-03"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/journalofethics.2017.19.3.stas1-1703</pub-id>
          <pub-id pub-id-type="medline">28323609</pub-id>
          <pub-id pub-id-type="pii">journalofethics.2017.19.3.stas1-1703</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sager</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lyman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bucknall</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Nhan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tick</surname>
              <given-names>LJ</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing and the representation of clinical data</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>1994</year>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>142</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/7719796"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.1994.95236145</pub-id>
          <pub-id pub-id-type="medline">7719796</pub-id>
          <pub-id pub-id-type="pmcid">PMC116193</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Siddharthan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Christopher D. Manning and Hinrich Schutze. Foundations of Statistical Natural Language Processing. MIT Press, 2000. ISBN 0-262-13360-1. 620 pp. $64.95/£44.95 (cloth)</article-title>
          <source>Nat Lang Eng</source>
          <year>2002</year>
          <month>06</month>
          <day>17</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>91</fpage>
          <lpage>92</lpage>
          <pub-id pub-id-type="doi">10.1017/S1351324902212851</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ford</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>HE</given-names>
            </name>
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cassell</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Extracting information from the text of electronic medical records to improve case detection: a systematic review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2016</year>
          <month>09</month>
          <volume>23</volume>
          <issue>5</issue>
          <fpage>1007</fpage>
          <lpage>15</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26911811"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocv180</pub-id>
          <pub-id pub-id-type="medline">26911811</pub-id>
          <pub-id pub-id-type="pii">ocv180</pub-id>
          <pub-id pub-id-type="pmcid">PMC4997034</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>AY</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ngo</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson-Browne</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Feller</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>McElhinney</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Culver</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Alfreds</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Stearns</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sylvester</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>Widen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>XB</given-names>
            </name>
          </person-group>
          <article-title>Web-based real-time case finding for the population health management of patients with diabetes mellitus: a prospective validation of the natural language processing-based algorithm with statewide electronic medical records</article-title>
          <source>JMIR Med Inform</source>
          <year>2016</year>
          <month>11</month>
          <day>11</day>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>e37</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2016/4/e37/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.6328</pub-id>
          <pub-id pub-id-type="medline">27836816</pub-id>
          <pub-id pub-id-type="pii">v4i4e37</pub-id>
          <pub-id pub-id-type="pmcid">PMC5124114</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mallipeddi</surname>
              <given-names>VP</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhry</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Kullo</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Arruda-Olson</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of clinical notes for identification of critical limb ischemia</article-title>
          <source>Int J Med Inform</source>
          <year>2018</year>
          <month>03</month>
          <volume>111</volume>
          <fpage>83</fpage>
          <lpage>89</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1386-5056(17)30475-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2017.12.024</pub-id>
          <pub-id pub-id-type="medline">29425639</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(17)30475-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC5808583</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Espinosa-Anke</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tello</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pardo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Medrano</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Ureña</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Salcedo</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Saggion</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Savana: a global information extraction and terminology expansion framework in the medical domain</article-title>
          <source>Procesamiento Lenguaje Nat</source>
          <year>2016</year>
          <volume>57</volume>
          <fpage>23</fpage>
          <lpage>30</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hernandez Medrano</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Tello Guijarro</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Belda</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Urena</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Salcedo</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Espinosa-Anke</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Saggion</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Savana: re-using electronic health records with artificial intelligence</article-title>
          <source>Int J Interactive Multimedia Artif Intelligence</source>
          <year>2018</year>
          <volume>4</volume>
          <issue>7</issue>
          <fpage>8</fpage>
          <pub-id pub-id-type="doi">10.9781/ijimai.2017.03.001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Graziani</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Soriano</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Del Rio-Bermudez</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Morena</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Díaz</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Castillo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Alonso</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ancochea</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lumbreras</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Izquierdo</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Characteristics and prognosis of COVID-19 in patients with COPD</article-title>
          <source>J Clin Med</source>
          <year>2020</year>
          <month>10</month>
          <day>12</day>
          <volume>9</volume>
          <issue>10</issue>
          <fpage>3259</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=jcm9103259"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/jcm9103259</pub-id>
          <pub-id pub-id-type="medline">33053774</pub-id>
          <pub-id pub-id-type="pii">jcm9103259</pub-id>
          <pub-id pub-id-type="pmcid">PMC7600734</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Izquierdo</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Ancochea</surname>
              <given-names>J</given-names>
            </name>
            <collab>Savana COVID-19 Research Group</collab>
            <name name-style="western">
              <surname>Soriano</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>Clinical characteristics and prognostic factors for intensive care unit admission of patients with COVID-19: retrospective study using machine learning and natural language processing</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>10</month>
          <day>28</day>
          <volume>22</volume>
          <issue>10</issue>
          <fpage>e21801</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/10/e21801/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21801</pub-id>
          <pub-id pub-id-type="medline">33090964</pub-id>
          <pub-id pub-id-type="pii">v22i10e21801</pub-id>
          <pub-id pub-id-type="pmcid">PMC7595750</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Izquierdo</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Almonacid</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>González</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Del Rio-Bermudez</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ancochea</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cárdenas</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lumbreras</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Soriano</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>The impact of COVID-19 on patients with asthma</article-title>
          <source>Eur Respir J</source>
          <year>2021</year>
          <month>03</month>
          <volume>57</volume>
          <issue>3</issue>
          <fpage>2003142</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://erj.ersjournals.com/lookup/pmidlookup?view=long&#38;pmid=33154029"/>
          </comment>
          <pub-id pub-id-type="doi">10.1183/13993003.03142-2020</pub-id>
          <pub-id pub-id-type="medline">33154029</pub-id>
          <pub-id pub-id-type="pii">13993003.03142-2020</pub-id>
          <pub-id pub-id-type="pmcid">PMC7651839</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brunak</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Mining electronic health records: towards better research applications and clinical care</article-title>
          <source>Nat Rev Genet</source>
          <year>2012</year>
          <month>05</month>
          <day>02</day>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>395</fpage>
          <lpage>405</lpage>
          <pub-id pub-id-type="doi">10.1038/nrg3208</pub-id>
          <pub-id pub-id-type="medline">22549152</pub-id>
          <pub-id pub-id-type="pii">nrg3208</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rindflesch</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Corn</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing: state of the art and prospects for significant progress, a workshop sponsored by the National Library of Medicine</article-title>
          <source>J Biomed Inform</source>
          <year>2013</year>
          <month>10</month>
          <volume>46</volume>
          <issue>5</issue>
          <fpage>765</fpage>
          <lpage>73</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(13)00079-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2013.06.004</pub-id>
          <pub-id pub-id-type="medline">23810857</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(13)00079-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perrot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duchesnay</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>J Machine Learning Res</source>
          <year>2011</year>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>2830</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McHugh</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>Interrater reliability: the kappa statistic</article-title>
          <source>Biochem Med (Zagreb)</source>
          <year>2012</year>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>276</fpage>
          <lpage>82</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.biochemia-medica.com/2012/22/276"/>
          </comment>
          <pub-id pub-id-type="medline">23092060</pub-id>
          <pub-id pub-id-type="pmcid">PMC3900052</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Osen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Choo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Perry</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hesse</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Abantanga</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>McCord</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chrouser</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Abdullah</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Validation of the World Health Organization tool for situational analysis to assess emergency and essential surgical care at district hospitals in Ghana</article-title>
          <source>World J Surg</source>
          <year>2011</year>
          <month>03</month>
          <volume>35</volume>
          <issue>3</issue>
          <fpage>500</fpage>
          <lpage>4</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21190114"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00268-010-0918-1</pub-id>
          <pub-id pub-id-type="medline">21190114</pub-id>
          <pub-id pub-id-type="pmcid">PMC3032911</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Canales</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Menke</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Marchesseau</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>D'Agostino</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Del Rio-Bermudez</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Taberna</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tello</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Assessing the performance of clinical natural language processing systems: development of an evaluation methodology</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>07</month>
          <day>23</day>
          <volume>9</volume>
          <issue>7</issue>
          <fpage>e20492</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/7/e20492/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/20492</pub-id>
          <pub-id pub-id-type="medline">34297002</pub-id>
          <pub-id pub-id-type="pii">v9i7e20492</pub-id>
          <pub-id pub-id-type="pmcid">PMC8367121</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Landis</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Koch</surname>
              <given-names>GG</given-names>
            </name>
          </person-group>
          <article-title>The measurement of observer agreement for categorical data</article-title>
          <source>Biometrics</source>
          <year>1977</year>
          <month>03</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>159</fpage>
          <lpage>74</lpage>
          <pub-id pub-id-type="medline">843571</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Olivera</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Danese</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jay</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Natoli</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Peyrin-Biroulet</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Big data in IBD: a look into the future</article-title>
          <source>Nat Rev Gastroenterol Hepatol</source>
          <year>2019</year>
          <month>05</month>
          <volume>16</volume>
          <issue>5</issue>
          <fpage>312</fpage>
          <lpage>321</lpage>
          <pub-id pub-id-type="doi">10.1038/s41575-019-0102-5</pub-id>
          <pub-id pub-id-type="medline">30659247</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41575-019-0102-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oronoz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gojenola</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pérez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>de Ilarraza</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Casillas</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>On the creation of a clinical gold standard corpus in Spanish: mining adverse drug reactions</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>08</month>
          <volume>56</volume>
          <fpage>318</fpage>
          <lpage>32</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00126-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.06.016</pub-id>
          <pub-id pub-id-type="medline">26141794</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00126-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pérez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Weegar</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Casillas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gojenola</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Oronoz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dalianis</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Semi-supervised medical entity recognition: a study on Spanish and Swedish clinical corpora</article-title>
          <source>J Biomed Inform</source>
          <year>2017</year>
          <month>07</month>
          <volume>71</volume>
          <fpage>16</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30104-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.05.009</pub-id>
          <pub-id pub-id-type="medline">28526460</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30104-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Velupillai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mowery</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>South</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Kvist</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dalianis</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Recent advances in clinical natural language processing in support of semantic analysis</article-title>
          <source>Yearb Med Inform</source>
          <year>2015</year>
          <month>08</month>
          <day>13</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>183</fpage>
          <lpage>93</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.thieme-connect.com/DOI/DOI?10.15265/IY-2015-009"/>
          </comment>
          <pub-id pub-id-type="doi">10.15265/IY-2015-009</pub-id>
          <pub-id pub-id-type="medline">26293867</pub-id>
          <pub-id pub-id-type="pii">me2015-009</pub-id>
          <pub-id pub-id-type="pmcid">PMC4587060</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Masanz</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ogren</surname>
              <given-names>PV</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kipper-Schuler</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
          </person-group>
          <article-title>Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): architecture, component evaluation and applications</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>507</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20819853"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.001560</pub-id>
          <pub-id pub-id-type="medline">20819853</pub-id>
          <pub-id pub-id-type="pii">17/5/507</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995668</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jonnalagadda</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Adupa</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Garg</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Corona-Cox</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Text mining of the electronic health record: an information extraction approach for automated identification and subphenotyping of HFpEF patients for clinical trials</article-title>
          <source>J Cardiovasc Transl Res</source>
          <year>2017</year>
          <month>06</month>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>313</fpage>
          <lpage>321</lpage>
          <pub-id pub-id-type="doi">10.1007/s12265-017-9752-2</pub-id>
          <pub-id pub-id-type="medline">28585184</pub-id>
          <pub-id pub-id-type="pii">10.1007/s12265-017-9752-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Entity recognition from clinical texts via recurrent neural network</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2017</year>
          <month>07</month>
          <day>05</day>
          <volume>17</volume>
          <issue>Suppl 2</issue>
          <fpage>67</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-017-0468-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-017-0468-7</pub-id>
          <pub-id pub-id-type="medline">28699566</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-017-0468-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5506598</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Soysal</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pakhomov</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>CLAMP - a toolkit for efficiently building customized clinical natural language processing pipelines</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2018</year>
          <month>03</month>
          <day>01</day>
          <volume>25</volume>
          <issue>3</issue>
          <fpage>331</fpage>
          <lpage>336</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29186491"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocx132</pub-id>
          <pub-id pub-id-type="medline">29186491</pub-id>
          <pub-id pub-id-type="pii">4657212</pub-id>
          <pub-id pub-id-type="pmcid">PMC7378877</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moreno</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Moreda</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Romá-Ferri</surname>
              <given-names>MT</given-names>
            </name>
          </person-group>
          <article-title>MaNER: A MedicAl Named Entity Recogniser</article-title>
          <year>2015</year>
          <conf-name>Natural Language Processing and Information Systems 20th International Conference on Applications of Natural Language to Information Systems</conf-name>
          <conf-date>June 17-19, 2015</conf-date>
          <conf-loc>Passau, Germany</conf-loc>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>418</fpage>
          <lpage>423</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-319-19581-0_40</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ananthakrishnan</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Perez</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Gainer</surname>
              <given-names>VS</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Churchill</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Karlson</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Plenge</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>KP</given-names>
            </name>
          </person-group>
          <article-title>Improving case definition of Crohn's disease and ulcerative colitis in electronic medical records using natural language processing: a novel informatics approach</article-title>
          <source>Inflamm Bowel Dis</source>
          <year>2013</year>
          <month>06</month>
          <volume>19</volume>
          <issue>7</issue>
          <fpage>1411</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23567779"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MIB.0b013e31828133fd</pub-id>
          <pub-id pub-id-type="medline">23567779</pub-id>
          <pub-id pub-id-type="pmcid">PMC3665760</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kurowski</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Milinovich</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Bauman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sugano</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kattan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Achkar</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <article-title>Differences in biologic utilization and surgery rates in pediatric and adult Crohn's disease: results from a large electronic medical record-derived cohort</article-title>
          <source>Inflamm Bowel Dis</source>
          <year>2021</year>
          <month>06</month>
          <day>15</day>
          <volume>27</volume>
          <issue>7</issue>
          <fpage>1035</fpage>
          <lpage>1044</lpage>
          <pub-id pub-id-type="doi">10.1093/ibd/izaa239</pub-id>
          <pub-id pub-id-type="medline">32914165</pub-id>
          <pub-id pub-id-type="pii">5903962</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
