<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i12e28632</article-id>
      <article-id pub-id-type="pmid">34951601</article-id>
      <article-id pub-id-type="doi">10.2196/28632</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Text Mining of Adverse Events in Clinical Trials: Deep Learning Approach</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Alex</surname>
            <given-names>Beatrice</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Burns</surname>
            <given-names>Michael</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Shams</surname>
            <given-names>Shayan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Chopard</surname>
            <given-names>Daphne</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7964-1681</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Treder</surname>
            <given-names>Matthias S</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5955-2326</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Corcoran</surname>
            <given-names>Padraig</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9731-3385</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Ahmed</surname>
            <given-names>Nagheen</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0647-0244</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Johnson</surname>
            <given-names>Claire</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6190-3022</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Busse</surname>
            <given-names>Monica</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5331-5909</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Spasic</surname>
            <given-names>Irena</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>School of Computer Science &#38; Informatics</institution>
            <institution>Cardiff University</institution>
            <addr-line>Abacws</addr-line>
            <addr-line>Senghennydd Road</addr-line>
            <addr-line>Cardiff, CF24 4AG</addr-line>
            <country>United Kingdom</country>
            <phone>44 2920870032</phone>
            <email>spasici@cardiff.ac.uk</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8132-3885</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>School of Computer Science &#38; Informatics</institution>
        <institution>Cardiff University</institution>
        <addr-line>Cardiff</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Centre for Trials Research</institution>
        <institution>Cardiff University</institution>
        <addr-line>Cardiff</addr-line>
        <country>United Kingdom</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Irena Spasic <email>spasici@cardiff.ac.uk</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>12</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>24</day>
        <month>12</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>12</issue>
      <elocation-id>e28632</elocation-id>
      <history>
        <date date-type="received">
          <day>9</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>7</day>
          <month>6</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>1</day>
          <month>8</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>14</day>
          <month>11</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Daphne Chopard, Matthias S Treder, Padraig Corcoran, Nagheen Ahmed, Claire Johnson, Monica Busse, Irena Spasic. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 24.12.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/12/e28632" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Pharmacovigilance and safety reporting, which involve processes for monitoring the use of medicines in clinical trials, play a critical role in the identification of previously unrecognized adverse events or changes in the patterns of adverse events.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to demonstrate the feasibility of automating the coding of adverse events described in the narrative section of the serious adverse event report forms to enable statistical analysis of the aforementioned patterns.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used the Uniﬁed Medical Language System (UMLS) as the coding scheme, which integrates 217 source vocabularies, thus enabling coding against other relevant terminologies such as the International Classification of Diseases–10th Revision, Medical Dictionary for Regulatory Activities, and Systematized Nomenclature of Medicine). We used MetaMap, a highly configurable dictionary lookup software, to identify the mentions of the UMLS concepts. We trained a binary classifier using Bidirectional Encoder Representations from Transformers (BERT), a transformer-based language model that captures contextual relationships, to differentiate between mentions of the UMLS concepts that represented adverse events and those that did not.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The model achieved a high F1 score of 0.8080, despite the class imbalance. This is 10.15 percent points lower than human-like performance but also 17.45 percent points higher than that of the baseline approach.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>These results confirmed that automated coding of adverse events described in the narrative section of serious adverse event reports is feasible. Once coded, adverse events can be statistically analyzed so that any correlations with the trialed medicines can be estimated in a timely fashion.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>deep learning</kwd>
        <kwd>machine learning</kwd>
        <kwd>classification</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Modern health care is associated with increased costs and broad-reaching variations in care and outcomes across the global population. The provision of evidence-based health care is a critical priority for users, providers, and policy makers alike. The systematic and high-quality conduct of clinical trials is critical for the development of clinical guidance to inform evidence-based practice. Pharmacovigilance and safety reporting are among the most important aspects of the conduct of clinical trials. This is relevant to all clinical trials in which the benefit or harm must be fully established before any intervention or medicinal product is adopted.</p>
        <p>Pharmacovigilance and safety reporting provide the basis for ensuring clinical trial participant safety and good research practice. It involves processes for monitoring the use of medicines or interventions in clinical trials. It has a critical role in the identification of previously unrecognized adverse events or changes in the patterns of adverse events. It is also relevant to the assessment of the risks and benefits of medicines or interventions to determine what action, if any, is needed to improve their safe use.</p>
        <p>An adverse event is any untoward medical occurrence in a participant to whom a medicinal product has been administered, including occurrences that are not necessarily caused by or related to the administered product. A serious adverse event (SAE) is any untoward medical occurrence that, at any dose, results in death, is life-threatening, requires inpatient hospitalization or causes prolongation of existing hospitalization, results in persistent or significant disability or incapacity, or comprises a congenital anomaly or birth defect. Early detection of unknown adverse events, reactions, interactions, and an increase in the frequency of (known) adverse events is a key element of the pharmacovigilance and safety process. Provision of up-to-date information on adverse events to health care professionals, researchers, and regulatory bodies contributes to the assessment of benefit, harm, effectiveness, and risk of the intervention, thus advancing their safe, rational, and more effective (including cost-effective) use.</p>
        <p>In multicenter noncommercial clinical trials conducted in the United Kingdom, the SAE reporting requirements are detailed in the trial protocol, and the principal investigators at National Health Service sites are responsible for reporting SAEs to the coordinating clinical trial unit (CTU) for an assessment of the seriousness, causality, and expectedness as delegated by the clinical trial sponsor. An SAE report includes an event term and additional signs and symptoms in a narrative. The narrative is reported by a physician during their medical assessment of the event. The report is then reviewed by a central CTU reviewer to assess any potential causal relationship with the trial drug. Each narrative is reviewed as a single report. The narratives are typically received from sites as paper records. These are logged electronically in the safety databases by the CTU pharmacovigilance team for the relevant national competent authorities (eg, the UK Medicines and Health Care Products Regulatory Agency or European Medicines Agency). The reports are searchable on request and subject to appropriate regulatory permissions. There is now a clear recognition of the potential for artificial intelligence in safety case management to identify relationships and signals [<xref ref-type="bibr" rid="ref1">1</xref>]. Although these approaches may be implemented in commercial settings and within competent authorities, such methods for classifying and categorizing data are not yet standardized or explicit across noncommercial pharmacovigilance settings.</p>
        <p>It is possible that the narrative contains additional adverse events or toxicities that are not coded as additional events and are captured in the narrative only. However, there is no mechanism for the detection of safety signals across individual reports or individual trials and, thus, there is no possibility for early detection of worrying trends. This is particularly the case for toxicities for which reconciliation with the clinical database would be advantageous. Such a tool would facilitate the cross-checking of toxicities recorded in the narrative of the SAE form with those recorded in the trial database, which is currently only feasible if automated. Although these approaches may be used in commercial trial settings, they would not always be used in the public domain simply because of the nature of the drug licensing pathway.</p>
        <p>This study seeks to use text mining to automatically identify and code adverse events from the narrative sections of SAE reports in clinical trials of investigational medicinal products coordinated by a noncommercial CTU, with the aim of unlocking narrative evidence for further statistical analysis. Although such an analysis is beyond the scope of this study, it would serve to monitor the patterns of adverse events at the cohort level rather than singular adverse events. Owing to their narrative nature, such an analysis cannot be conducted directly on the content of SAE reports.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <p>Text mining has been used to identify adverse events from a variety of data sources, including spontaneous reporting systems, medical literature, electronic health records, and user-generated content on the internet [<xref ref-type="bibr" rid="ref2">2</xref>]. The problem of mining adverse events in text has been approached from different angles. Most commonly, it has been defined as a text classification problem, where a piece of text, either an entire document or its part (eg, an individual sentence), is mapped to ≥1 predefined class that correspond to a type of adverse event or its property. Some approaches target a specific adverse event such as anaphylaxis and perform simple binary classification with respect to the presence of the event considered [<xref ref-type="bibr" rid="ref3">3</xref>]. Other examples target a range of drugs and use documents that mention them to train a binary classifier with respect to their safety, using an existing watch list of drugs that have an active safety alert posted on the US Food and Drug Administration website [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
        <p>In terms of semantics, adverse events are compatible with signs and symptoms. When a dictionary-based method is used to extract such instances, a binary classifier is needed to differentiate between the signs and symptoms that correspond to adverse events and those associated with the underlying diagnosis [<xref ref-type="bibr" rid="ref5">5</xref>]. Along similar lines, when an adverse event is associated with medication, a system is needed to support safety evaluators in identifying reports that may demonstrate causal relationships with the suspect medications. To this end, it has been shown that a binary classifier can be trained to successfully differentiate between 2 causality categories: certain, probable, or possible versus unlikely or unassessable [<xref ref-type="bibr" rid="ref6">6</xref>]. Multifaceted classification can be performed to identify additional properties of an adverse event, for example, temporal (historical or present), categorical (assertive, hypothetical, retrospective, or a general discussion), and contextual (deduced or explicitly stated) [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
        <p>Alternatively, the problem of identifying adverse events can be defined as that of information extraction [<xref ref-type="bibr" rid="ref8">8</xref>]. More specifically, we can differentiate between entity and relationship extraction. Here, the goal of entity extraction is to identify a text sequence that describes an adverse event. Therefore, it can also be viewed as a sequence labeling problem [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. In addition, the text sequence can be mapped to a relevant dictionary such as the Medical Dictionary for Regulatory Activities [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>] or the Uniﬁed Medical Language System (UMLS) [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Such normalization of named entities to standardized identifiers is especially relevant when processing text originating from social media, whose language tends to be highly colloquial [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref17">17</xref>].</p>
        <p>When multiple medicines are considered, 2 types of named entities need to be extracted—medicines and adverse events—and additional reasoning needs to be performed to extract a relationship between the two [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. Further statistical analysis can be applied to such pairs to measure the strength of such associations [<xref ref-type="bibr" rid="ref18">18</xref>]. Information of interest can be extracted using pattern-matching approaches, where patterns are typically modeled using regular expressions [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. Alternatively, frequent patterns of language for expressing opinions about medications can be learned automatically using association rule mining by considering sentences as transactions and the words in a sentence as items in the transactions [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
        <p>Specific methods chosen to mine adverse events from text depend on the way the text mining problem is posed. Typical approaches chosen for text classification include rule-based methods [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref20">20</xref>] and supervised machine learning [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. A range of machine learning methods has been used, including naive Bayes, support vector machines, random forests, maximum entropy, and logistic regression. On occasion, ensemble learning has been used to improve classification performance by integrating multiple models using methods such as bagging, majority voting, weighted averaging, and stacked generalization [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. The different types of lexical, syntactic, and semantic features have been used by the classification algorithms. Lexical features include n-grams [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref16">16</xref>], context windows [<xref ref-type="bibr" rid="ref17">17</xref>], and lexicon matches [<xref ref-type="bibr" rid="ref16">16</xref>]. Typically, syntactic features include part-of-speech tags, negation, syntactic dependencies, and syntactic functions [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Semantic features are either based on external sources such as the UMLS, PubChem, or DrugBank [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref22">22</xref>] or manually engineered [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. Other used features were based on sentiment polarities [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref16">16</xref>] and topic modeling [<xref ref-type="bibr" rid="ref16">16</xref>]. A few examples of using feature selection methods include binormal separation [<xref ref-type="bibr" rid="ref4">4</xref>] and information gain [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
        <p>Finally, approaches chosen to address adverse event mining as a sequence labeling problem include conditional random fields (CRFs) [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref23">23</xref>] and, more recently, neural networks (NNs) [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], including recurrent NNs [<xref ref-type="bibr" rid="ref10">10</xref>] and long short-term memory (LSTM) [<xref ref-type="bibr" rid="ref24">24</xref>], which outperformed CRFs. For best results, bidirectional LSTM is combined with CRF [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. Most approaches used word embeddings, which represent words as meaningful real-valued vectors of configurable dimensions learned automatically from a large corpus based on their co-occurrence using methods such as word2vec [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref27">27</xref>], fastText [<xref ref-type="bibr" rid="ref24">24</xref>], and GloVe [<xref ref-type="bibr" rid="ref30">30</xref>]). Traditional bag-of-words (BOW) approaches tend to struggle with unseen or rare words. Word embeddings that are pretrained on a large corpus remedy this problem and, consequently, boost recall (R).</p>
        <p>The aforementioned word-embedding models generate a single embedding for each word, thus conflating homonyms in the corresponding vector space. Bidirectional Encoder Representations from Transformers (BERT) [<xref ref-type="bibr" rid="ref31">31</xref>] captures contextual relationships in a bidirectional way to contextualize the embedding of any given word based on the surrounding words. BERT is based on an encoder–decoder NN architecture, which can not only be used to generate word embeddings but can also be fine-tuned and further trained for various text mining tasks. For example, it has been used to model adverse event extraction as a named entity recognition (NER) task [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. The topics of word embedding and BERT, in particular, will be revisited later in this paper in the context of motivating and describing our own approach to this problem.</p>
        <p>The after-the-fact nature of text data collected from sources such as spontaneous reporting systems, medical literature, electronic health records, and social media naturally gives rise to postmarketing surveillance applications [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. However, pharmacovigilance starts by collecting safety information derived from randomized controlled trials. Our review of text mining applications related to the identification of adverse events revealed that this source of data was underrepresented. This study addresses this gap by using SAE report forms collected during clinical trials as the primary source of data. Given that each trial focuses on a specific medicinal product, the problem is somewhat simplified as the need to extract information about the product itself is obviated. This also makes it more natural to define it as a multi-label text classification problem rather than an information extraction problem. Using the UMLS as our classification scheme, the main aim is to map each document to a set of coded adverse events. The main difficulty of the problem lies in differentiating between signs and symptoms associated with the underlying condition and those that represent adverse events. The fact that both types of references to signs and symptoms can be found within a single SAE report, often within the same sentence, renders a BOW approach unsuitable. Instead, we opt for a deep learning approach. Instead of LSTM approaches, which seem to dominate in our review of the related work, we opt for transformers, which tend to outperform recurrent NNs on a variety of natural language processing tasks.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Provenance</title>
        <p>Data were provided by the Center for Trials Research (CTR), the largest group of academic (noncommercial) clinical trial staff in Wales. Their portfolio of work includes drug trials and complex interventions, mechanisms of disease and treatments, cohort studies, and informing policy and practice in partnerships with researchers across the United Kingdom and worldwide. Across all these trials, standard procedures are put in place to monitor and manage safety reporting and SAE in line with the regulatory requirements for research.</p>
        <p>Clinical trials SAE report forms (<xref rid="figure1" ref-type="fig">Figure 1</xref>) are completed by research nurses and physicians at hospital or clinical trial sites and submitted as PDF documents to the CTR central safety team for management and processing. They contain data on the SAE and a narrative description of the event. The narrative is used by the reviewer to help assess causal relationships with the trial drug but is not entered into the trial database and is not used in any analysis of the events. Completed SAE reports are then sent for review by a physician and, depending on the outcome of the review, are logged in the safety databases for the regulatory authorities, ethics committees, and drug companies.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>A serious adverse event (SAE) reporting form. CTCAE: Common Terminology Criteria for Adverse Events; N/A: Not Applicable.</p>
          </caption>
          <graphic xlink:href="medinform_v9i12e28632_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Although narratives in noncommercial settings, such as CTR, can be digitized, this does not currently take place at the point of initial SAE reporting, as electronic data capture for the SAE report is associated with additional regulatory challenges, primarily because of the requirement for signature verification by a physician and a contemporaneous changelog. Clinical trial staff reviewing SAE reports are, thus, unable to systematically analyze the information provided in the narrative, missing an opportunity to identify the trends and potential safety signals. If the text mining approach were to identify additional safety events and signals not detected through standard reporting, processes could be altered to improve work practices at the level of a noncommercial CTU pharmacovigilance team.</p>
        <p>This study aims to assess the feasibility of text mining in the context of such an analysis. The findings could affect the way regulatory narratives are reviewed and analyzed, for example, noncompliances or audit findings.</p>
      </sec>
      <sec>
        <title>Data Collection</title>
        <p>Data were collected from 6 ongoing clinical trials, as described in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <p>Ethical review and approval were waived for this study as this study involved the use of secondary SAE data that were fully deidentified. All involved trials were conducted according to the guidelines of the Declaration of Helsinki and approved by the relevant research ethics committees. All chief investigators from these trials were consulted, and sponsor agreement was obtained for the use of the data in this secondary research study. Participant consent was also waived for the reasons stated above.</p>
        <p>A subset of SAE reports was sampled randomly from each trial, giving a total of 286 reports. Phases 1 and 2 were early phases with a smaller number of participants and were not powered. The fewer numbers of reported SAEs were a function of the smaller numbers of participants compared with phase 3; hence, there were variations in the number of documents across the 6 trials.</p>
        <p>The original SAE reports were pseudoanonymized at the point of extraction from the system by obscuring any links between the patient and their individual records. The narrative sections of the SAE reports were then transcribed and saved as Microsoft Word documents. The transcription process was extended to include deidentification by obscuring any personally identifiable information in a way that minimizes the risk of unintended disclosure of the identity of individuals and information about them. The transcribed documents were an average of 37 (SD 24) tokens long.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Clinical trials from which data were collected.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="80"/>
            <col width="810"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td>ID</td>
                <td>Description</td>
                <td>Documents, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Trial-1</td>
                <td>A phase 2 study of neoadjuvant chemotherapy given before short-course preoperative radiotherapy as treatment for patients with MRI<sup>a</sup>-staged operable rectal cancer at high risk of metastatic relapse</td>
                <td>5</td>
              </tr>
              <tr valign="top">
                <td>Trial-2</td>
                <td>A phase 1b/2 randomized placebo-controlled trial in postmenopausal women with advanced breast cancer previously treated with drug A</td>
                <td>7</td>
              </tr>
              <tr valign="top">
                <td>Trial-3</td>
                <td>A randomized phase 3 clinical trial investigating the effect of drug B added to standard therapy in patients with lung cancer</td>
                <td>131</td>
              </tr>
              <tr valign="top">
                <td>Trial-4</td>
                <td>Study of chemoradiotherapy in esophageal cancer, plus or minus drug C</td>
                <td>34</td>
              </tr>
              <tr valign="top">
                <td>Trial-5</td>
                <td>A phase 1/2 single-arm trial to evaluate combination drugs for the treatment of advanced cancers, including first-line treatment of patients with advanced transitional cell carcinoma of the urothelium</td>
                <td>3</td>
              </tr>
              <tr valign="top">
                <td>Trial-6</td>
                <td>A randomized phase 3, open-label, multicenter, parallel group clinical trial to evaluate and compare the efficacy, safety profile, and tolerability of oral drug X versus intravenous drug Y in the treatment of patients with breast cancer and bone metastases</td>
                <td>106</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>MRI: magnetic resonance imaging.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Data Annotation</title>
        <p>The aim of this task was to annotate adverse events in the transcribed versions of the SAE report forms. For the purpose of this task, an adverse event was defined as any unfavorable or unintended disease, sign, or symptom (including an abnormal laboratory finding) that is temporally associated with the use of a medical treatment or procedure, which may or may not be considered related to the medical treatment or procedure. Such an event could be related to the intervention, dose, route of administration, or patient or caused by an interaction with another drug or procedure.</p>
        <p>The annotation guidelines prescribed the scope of the annotation task as follows: (1) focus only on adverse events that have occurred in the present or past, that is, ignore hypothetical or future events; (2) annotate the entire phrase that describes an adverse event; and (3) if the same adverse event were mentioned multiple times, then annotate every mention. The annotation process was based on the following instructions: (1) identify an adverse event that is mentioned in the narrative, (2) select the text that describes the adverse event, and (3) highlight the selected text.</p>
        <p>The text editing operations were performed using Microsoft Word, which was preferred over a specifically designed annotation tool such as BRAT or Bionotate [<xref ref-type="bibr" rid="ref34">34</xref>] because of zero installation and training overhead. Microsoft Word supports the bulk selection of text based on its formatting. This functionality was used to export highlighted text as standoff annotations, which were later used to calculate the interannotator agreement.</p>
        <p>A total of 2 annotators independently annotated all the documents. <xref rid="figure2" ref-type="fig">Figure 2</xref> provides an example. Here, both annotators annotated 2 mentions of tremor but did not annotate the historical mention of tremor as it was not temporally associated with the use of the medical treatment that was the subject of the given clinical trial. Further, 1 reviewer failed to annotate vomiting, leading to disagreement, which was later resolved through discussion. To identify all such cases, we compared all annotations automatically and measured the interannotator agreement.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>A serious adverse event report annotated independently by 2 annotators. The annotations are highlighted in yellow.</p>
          </caption>
          <graphic xlink:href="medinform_v9i12e28632_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The 2 annotators labeled SAEs as phrases, which were sequences of words whose total number, together with their start and end positions, were not prefixed. Comparing the interannotator agreement at the token level, as suggested by Tomanek et al [<xref ref-type="bibr" rid="ref35">35</xref>], was not entirely appropriate for 2 reasons. First, the annotators labeled phrases as sequences of tokens instead of labeling the tokens individually. Therefore, such an approach approximated the original annotation task. More importantly, the number of negative cases (ie, the tokens that had not been annotated) would inevitably be much larger than the number of positive cases, thus skewing the data. The lack of a well-defined number of negative cases prevented the use of traditional interannotator agreement measures such as Cohen κ statistic [<xref ref-type="bibr" rid="ref36">36</xref>]. A common way of quantifying interannotator agreement in such circumstances is to use information retrieval performance measures instead [<xref ref-type="bibr" rid="ref37">37</xref>]. By treating one annotator’s annotations as the gold standard and the other one’s as predictions, we calculated the numbers of true positives (TPs), false positives (FPs), and true negatives, as shown in the confusion matrix (<xref ref-type="table" rid="table2">Table 2</xref>). When these values were combined to calculate the F1 score, it no longer mattered which annotator was considered the gold standard as this measure was symmetrical.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Agreement between 2 annotators.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="490"/>
            <col width="400"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td>Positive or negative</td>
                <td>Gold positive</td>
                <td>Gold negative</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Predicted positive</td>
                <td>TP<sup>a</sup>=744</td>
                <td>FP<sup>b</sup>=50</td>
              </tr>
              <tr valign="top">
                <td>Predicted negative</td>
                <td>FN<sup>c</sup>=98</td>
                <td>N/A<sup>d</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>TP: true positive.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>FP: false positive.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>FN: false negative.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>These values can then be used to calculate the precision (P), R, and F1-score as follows (where FN denotes false negative):</p>
        <p>
          <disp-formula>P=TP/(TP+FP)=744/(744+50)=0.9370</disp-formula>
        </p>
        <p>
          <disp-formula>R=TP/(TP+FN)=744/(744+98)=0.8836</disp-formula>
        </p>
        <p>
          <disp-formula>F1=(2×P×R)/(P+R)=0.9095</disp-formula>
        </p>
        <p>An advantage of using information retrieval performance measures to estimate interannotator agreement is that their values can later be used to gauge a system against human-like performance. At F1=0.9095, the interannotator agreement was found to be relatively high. A total of 148 disagreements were resolved through discussions to establish the ground truth. As part of the discussions, the agreed annotations of adverse events were coded manually against the UMLS, which integrates multiple terminologies, classifications, and coding standards in an attempt to support the interoperability between biomedical information systems, including electronic health records [<xref ref-type="bibr" rid="ref38">38</xref>]. The MetaThesaurus Browser, a web-based search interface, was used to query the UMLS for each annotation to identify the corresponding concept (<xref rid="figure3" ref-type="fig">Figure 3</xref>). This searching procedure involved checking concept definitions to make sure that the chosen concept matched the sense of the adverse event annotation. Each concept in the UMLS is assigned a concept unique identifier (CUI), which was used to code the corresponding annotation (see <xref rid="figure4" ref-type="fig">Figure 4</xref> for examples). Subsequently, the CUI codes were extracted, duplicates were removed, and the remaining CUIs were used as class labels for each document. <xref ref-type="table" rid="table3">Table 3</xref> provides a statistical summary of the annotated data set, which contains a total of 995 class labels.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Metathesaurus browser search results.</p>
          </caption>
          <graphic xlink:href="medinform_v9i12e28632_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Coding of documents against the Uniﬁed Medical Language System.</p>
          </caption>
          <graphic xlink:href="medinform_v9i12e28632_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Statistical properties of the annotated data set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="320"/>
            <col width="340"/>
            <col width="240"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td>Statistical properties</td>
                <td>Document length (in tokens)</td>
                <td>Annotations</td>
                <td>Class labels</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Values, minimum</td>
                <td>2</td>
                <td>1</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td>Values, maximum</td>
                <td>223</td>
                <td>20</td>
                <td>19</td>
              </tr>
              <tr valign="top">
                <td>Values, median</td>
                <td>31</td>
                <td>3</td>
                <td>3</td>
              </tr>
              <tr valign="top">
                <td>Values, mean (SD)</td>
                <td>36.71 (23.77)</td>
                <td>3.76 (2.46)</td>
                <td>3.48 (2.18)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Problem Representation</title>
        <p>The aim of this study was to automate the identification of adverse events described in the narrative section of the SAE reports. This goal was cast as a text classification problem. Given a document and classification scheme, the system should label the document with the relevant classes from the given scheme. In our case, the document was an SAE report, a classification scheme was the set of concepts encompassed by the UMLS, and their CUIs were used as class labels. The second column in <xref rid="figure4" ref-type="fig">Figure 4</xref> provides an example of the expected output.</p>
        <p>To identify the possible adverse events mentioned in a document, the first step involved looking for concepts of the relevant semantic types. In our approach, the UMLS dictionary lookup was restricted to 6 manually selected semantic types: disease or syndrome, finding, injury or poisoning, neoplastic process, pathological function, and sign or symptom. Some of their mentions could be in the context of medical history and, therefore, not necessarily constitute an adverse event. To differentiate between the 2 types of mentions, we formulated a binary classification task at the concept level: given a context, does a specific UMLS concept constitute an adverse event? <xref rid="figure5" ref-type="fig">Figure 5</xref> provides different references to the concept of <italic>pleural effusion</italic>. For example, the first 3 references do not constitute adverse events. The first and third mentions of <italic>pleural effusion</italic> refer to medical history, whereas the second mention is negated. The remaining 3 mentions of <italic>pleural effusion</italic> refer to the cause of hospital admissions that prompted SAE reporting.</p>
        <p>The practical implementation of such problem representations started with linguistic preprocessing, which was originally developed to support cohort selection from hospital discharge summaries, adapted for this study [<xref ref-type="bibr" rid="ref39">39</xref>]. This module involved text segmentation and basic string operations such as lowercasing, fully expanding enclitics and special characters, replacing a selected subset of words and phrases with their representatives, and, in particular, replacing acronyms and abbreviations with their full forms. Finally, the preprocessed documents were analyzed using MetaMap [<xref ref-type="bibr" rid="ref40">40</xref>], a highly configurable dictionary lookup software, to find mentions of UMLS concepts from the 6 semantic types listed above. <xref rid="figure6" ref-type="fig">Figure 6</xref> illustrates a portion of the UMLS dictionary and how it was matched against the input text. As the figure illustrates, a single document might contain multiple adverse events. To support the classification of one adverse event candidate at a time, a separate copy of the given document was saved for each candidate. Each copy anchored a single concept, which may have had multiple occurrences, by marking them up in line. In addition, the text was further regularized by replacing all the concepts with their preferred names. Concept anchoring provided a simple, uniform representation of the potential adverse events, which enabled us to train a single binary classifier based on the context surrounding the anchors.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Adverse event identification as a binary classification task. CT: computed tomography.</p>
          </caption>
          <graphic xlink:href="medinform_v9i12e28632_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Identification of potential adverse event mentions. CUI: concept unique identifier.</p>
          </caption>
          <graphic xlink:href="medinform_v9i12e28632_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Classification Rationale</title>
        <p>The binary task formulation itself—<italic>given a context, does a specific UMLS concept constitute an adverse event?</italic>—indicates 2 main types of involved features: extrinsic (context) and intrinsic (concept). Extrinsic features may include the number of mentions within a document, the position within a document, and other words within a fixed-size window. When combined with gold standard annotations, machine learning can be used to discover how to differentiate between positive and negative contexts without having to manually describe the patterns of positive and negative use. For example, by considering the co-occurring words (see <xref rid="figure7" ref-type="fig">Figure 7</xref> for examples) and the corresponding annotations, a simple NN can learn to use words such as <italic>previous</italic> and <italic>have</italic> as negative and positive modifiers, respectively. By considering a wider context, more complex patterns such as <italic>admitted to hospital with</italic> and <italic>known to have</italic> (see <xref rid="figure8" ref-type="fig">Figure 8</xref> for examples) would start to emerge as positive and negative contexts, respectively. Traditionally, such patterns were observed using corpus linguistics methods, which were engineered manually and encoded formally as regular expressions [<xref ref-type="bibr" rid="ref41">41</xref>]. In recent times, NNs are used to automatically capture both short- and long-range dependencies.</p>
        <p>Similarly, lexical morphology could be explored in an NN approach to learn the patterns of subwords within a concept’s name, which were positively or negatively correlated with adverse events. For example, it is reasonable to expect that any concept identified as a potential adverse event that contains the word <italic>chronic</italic> (eg, <italic>chronic obstructive airway disease</italic> or <italic>chronic infection</italic>) is more likely to refer to a process than a single event. Similarly, any concept whose name contains a word <italic>loss</italic> (eg, <italic>loss of appetite</italic> or <italic>hair loss</italic>) is more likely to be an adverse event. The words themselves can be analyzed for affixes. For example, the prefix hypo- (low or below normal) can be used to increase the likelihood of concepts such as <italic>hypocalcemia</italic> or <italic>orthostatic hypotension</italic> corresponding to adverse events. Similarly, the suffix <italic>-emia</italic> (presence in the blood) can be used to identify concepts such as <italic>cerebrovascular ischemia</italic> or <italic>hyperkalemia</italic> as strong candidates for adverse events. Again, no prior medical knowledge is required to embed such features into NNs, which consider inputs and outputs simultaneously to support end-to-end learning and, hence, bypasses manual feature engineering.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Observing the patterns of positive and negative modifiers. CRTI: common respiratory tract infection; GI: gastrointestinal; OGD: oesophagogastroduodenoscopy; PR: per rectum; SAE: serious adverse event.</p>
          </caption>
          <graphic xlink:href="medinform_v9i12e28632_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure8" position="float">
          <label>Figure 8</label>
          <caption>
            <p>Observing more complex patterns of positive and negative use. Hb: hemoglobin.</p>
          </caption>
          <graphic xlink:href="medinform_v9i12e28632_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Text Representation</title>
        <p>The first choice en route to implementing a binary adverse event classifier is text representation. Traditionally, the BOW representation, which is based on the frequency of occurrence of individual words, has been used to support text classification. Given that multiple signs and symptoms, some of which can be adverse events, are commonly discussed in an SAE report, the BOW representation would make it difficult to distinguish adverse events from other signs and symptoms discussed within the same document as it does not preserve local context. In addition, the BOW representation is not robust with respect to the out-of-dictionary problem; that is, any classifier trained using this representation will not be able to use words that were previously not encountered in the training data.</p>
        <p>Word embedding can alleviate this problem. Word embedding is a mapping from the lexicosemantic space of words to the n-dimensional real-valued vector space. Methods such as word2vec [<xref ref-type="bibr" rid="ref42">42</xref>] and GloVe [<xref ref-type="bibr" rid="ref43">43</xref>] for learning word embeddings from large corpora rely on the hypothesis of distributional semantics, which claims that words occurring in similar contexts tend to convey similar meanings [<xref ref-type="bibr" rid="ref44">44</xref>]. In other words, these methods assume that the meaning of a word depends on its context, that is, the frequency of co-occurrence with other words within a text window. Consequently, word embeddings tend to arrange semantically related words in similar spatial patterns. Therefore, by mapping a word to its embedding, it becomes possible to model its semantics numerically and thus use arithmetic operations to reason about it. This property is effectively used by NNs in which text is passed through a series of layers that each combines and transforms embeddings to eventually derive an output such as a class label in text classification or an answer in question answering.</p>
        <p>Context-free word-embedding models such as word2vec [<xref ref-type="bibr" rid="ref42">42</xref>] and GloVe [<xref ref-type="bibr" rid="ref43">43</xref>] generate a single embedding for each word, making it impossible to differentiate between homonyms in the corresponding vector space. For example, the word <italic>mole</italic> would have a single embedding regardless of its many different meanings. Context-sensitive word-embedding models such as BERT [<xref ref-type="bibr" rid="ref31">31</xref>] generate an embedding for each word based on the surrounding words. For example, the word <italic>mole</italic> used as <italic>a unit of measurement</italic> and <italic>a disorder that affects the soft tissue</italic> will have different representations in the word-embedding space.</p>
        <p>BERT [<xref ref-type="bibr" rid="ref31">31</xref>] is a transformer-based language model that captures contextual relationships in a bidirectional way. A transformer [<xref ref-type="bibr" rid="ref45">45</xref>] is an encoder–decoder NN architecture that uses attention mechanisms to forward a holistic interpretation of a sequence to the decoder simultaneously rather than sequentially, as is the case in recurrent NNs such as LSTM and gated recurrent units. For each word, which is represented by its embedding, the self-attention layer considers other words, including their positions, in the same sentence to improve its encoding. As a workaround for the self-attention issue, BERT uses masked language modeling, that is, hides a certain percentage of the words using a special token [MASK] and uses their position to infer these words. The context-sensitive nature of BERT embeddings makes this language model perfectly suited for practical implementation of the classification rationale described earlier. In addition, BERT uses WordPiece tokenization to obtain subword units by applying a greedy segmentation algorithm to minimize the number of WordPieces in the training corpus [<xref ref-type="bibr" rid="ref46">46</xref>]. This implies that the downstream classification model may be able to use the word morphology.</p>
      </sec>
      <sec>
        <title>Classification Model</title>
        <p>The masked language modeling was 1 of the 2 tasks on which BERT was trained simultaneously. The second task was the next sentence prediction. In addition to [MASK], BERT uses 2 other special tokens for fine-tuning and specific task training: (1) a classification token [CLS], which indicates the beginning of a sequence and is commonly used for classification tasks (the output associated with this token is used for the next sentence prediction task); and (2) a sequence delimiter token [SEP], which indicates the end of a segment.</p>
        <p>The embedding layer shown in <xref rid="figure9" ref-type="fig">Figure 9</xref> illustrates the input format that BERT expects. Each token’s vocabulary identifier is mapped to a token embedding that is learned during training. Next, a binary vector is used to differentiate between 2 text segments, typically sentences. The type of segment depends on a specific task, for example, in question answering both question, and the reference text could be appended and separated by a special delimiter token [SEP]. In our model, we chose the anchored concept as one segment and its context (ie, the whole document) as another. The binary vector was mapped to a segment embedding using a lookup table, which was learned during training. Finally, local token positions were mapped to positional embeddings using a lookup table, which was updated during training.</p>
        <p>The 3 types of embeddings were added and fed into the pretrained BERT<sub>BASE</sub> model, which comprises 12 layers of transformer encoders, each having a hidden size of 768 and 12 attention heads. Each layer produces a token-specific output, which can be used as its (contextualized) embedding. Similar to binary classification tasks described in [<xref ref-type="bibr" rid="ref31">31</xref>], the final transformer output corresponding to the special [CLS] token was taken as an aggregate problem representation, that is, pooled output, and passed on to the classification layer after a 0.1 dropout, which was used to reduce overfitting.</p>
        <p>The classification layer reduced the size of the pooled output from 768 to 2, which corresponds to the log-odds (or logits) of the classification output with respect to the question of whether the given concept was an adverse event or not. In contrast to the network up to that point, the classification layer was not pretrained. Instead, the corresponding weights were learned during BERT fine-tuning. As suggested in the study by Devlin [<xref ref-type="bibr" rid="ref31">31</xref>], the weights were initialized using a truncated normal distribution with mean 0 (SD 0.02). A softmax function was then applied to obtain the probability distribution of the 2 classes. The loss function (softmax cross entropy between the logits and the class labels) was optimized using the Adam optimizer with an initial learning rate of 2×10<sup>–5</sup>, which was chosen without any fine-tuning, based on the values suggested in the study by Devlin [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
        <p>The classification model was trained for 8 epochs. This hyperparameter was preselected without any tuning. In each epoch, the training data were looped over in batches of 8 samples. The batch size was limited by memory. All other parameters were kept identical to those in the original BERT<sub>BASE</sub> uncased model, including the clip norm of 1.0, and linear warmup (100 warmup steps with linear decay of learning rate). The system was implemented in TensorFlow [<xref ref-type="bibr" rid="ref47">47</xref>], an open-source software library for machine learning, with a particular focus on training and inference of deep NNs, using the GeForce RTX 2080 (Nvidia Corp) graphics processing unit to accelerate deep learning.</p>
        <fig id="figure9" position="float">
          <label>Figure 9</label>
          <caption>
            <p>Architecture based on Bidirectional Encoder Representations from Transformer (BERT) for classification of adverse events. CLS: classification token; SEP: sequence delimiter token.</p>
          </caption>
          <graphic xlink:href="medinform_v9i12e28632_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>During preprocessing, MetaMap was used to extract adverse event candidates. MetaMap failed to extract a total of 118 adverse events from the ground truth. Therefore, these instances automatically constituted FNs. The remaining 1021 adverse event candidates extracted by MetaMap were passed on to the BERT-based classification model shown in <xref rid="figure9" ref-type="fig">Figure 9</xref>. To understand the performance of the BERT classifier, we first focused only on these 995 adverse event candidates before amalgamating them with 118 FNs. Of the 995 candidates, 659 (66.2%) were positive instances (ie, regarded as adverse events in the ground truth), and 336 (33.8%) were negative instances (ie, not regarded as adverse events in the ground truth).</p>
      <p>We performed 10 independent 5-fold cross-validations to evaluate the performance of the classification model. In other words, during each cross-validation, 20% of the documents were held out for evaluation, whereas the remaining 80% were used for training, and this was done 5 times in a row, each time using a different fold for evaluation. More specifically, for each of the 10 independent runs, we did the following:</p>
      <p>The 286 unique document identifiers were first shuffled randomly and then split into 5 folds. Remember that each document may have contained multiple adverse event candidates, and a separate copy was created for each candidate during preprocessing. All copies of the same document shared the same document identifier; hence, there was no overlap of data across the folds. As the splitting was done by document irrespective of the number of events they contained, the actual number of samples (ie, potential adverse events identified by MetaMap) in each fold may vary. We looped over the folds, each time using a different fold for evaluation and the remaining 4 folds for training. Each time, we measured P, R, and F1 scores. Once each of the 5 folds was used for evaluation, we calculated the mean values obtained for each evaluation measure. Finally, these values were averaged over 10 independent runs.</p>
      <p>The same cross-validation process was applied to the baseline approach. Remember that the goal of our system was to code adverse events against the UMLS; therefore, a UMLS lookup was inevitable. The lookup itself could be performed as the first step to identify an adverse event candidate (and code it at the same time) and then classify it. Alternatively, it could be performed as the last step to code an adverse event, which was first extracted from free text. In the former approach, we were dealing with a binary classification problem where it needed to be determined whether a given UMLS concept was an adverse event or not. In the latter approach, we were dealing with a sequence labeling problem where the boundaries of a token sequence that referred to an adverse event needed to be determined. This is how Du et al [<xref ref-type="bibr" rid="ref32">32</xref>] approached the extraction of adverse events from safety reports by framing it as the NER problem and fine-tuning BERT for this task. We reimplemented and cross-validated their approach on our data set to establish the baseline. Although the authors originally used BERT for biomedical text mining (BioBERT) [<xref ref-type="bibr" rid="ref48">48</xref>], we replaced it with BERT in our experiments to make their approach directly comparable with ours. The results achieved by the 2 contrasting approaches are presented in <xref ref-type="table" rid="table4">Table 4</xref>. Despite the similarities in the underlying technologies, we can observe a notable difference in the performance of the 2 approaches, most prominently in terms of P, where we can see an improvement of approximately 30 percent points over the baseline. A detailed analysis of this phenomenon is provided in the <italic>Discussion</italic> section. In this section, we proceed to describe the results achieved using our own approach.</p>
      <p><xref rid="figure10" ref-type="fig">Figure 10</xref> displays the distribution of the prediction probabilities. The histogram combines the predictions from all folds used for cross-validation. We can observe that most prediction probabilities are concentrated around the 2 extremes, 0 and 1, which suggests that the classification model is able to make clear-cut decisions, as it does not depend on a specific threshold.</p>
      <table-wrap position="float" id="table4">
        <label>Table 4</label>
        <caption>
          <p>Evaluation results.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="100"/>
          <col width="470"/>
          <col width="430"/>
          <thead>
            <tr valign="top">
              <td>Parameters</td>
              <td>Baseline approach: named entity recognition (BERT<sup>a</sup>)+concept extraction (MetaMap), mean (SD)</td>
              <td>Our approach: concept extraction (MetaMap)+classification (BERT), mean (SD)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Precision</td>
              <td>0.5715 (0.0076)</td>
              <td>0.8638 (0.0057)</td>
            </tr>
            <tr valign="top">
              <td>Recall</td>
              <td>0.7116 (0.0096)</td>
              <td>0.7604 (0.0121)</td>
            </tr>
            <tr valign="top">
              <td>F1 score</td>
              <td>0.6335 (0.0072)</td>
              <td>0.8080 (0.0071)</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table4fn1">
            <p><sup>a</sup>BERT: Bidirectional Encoder Representations from Transformers.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <fig id="figure10" position="float">
        <label>Figure 10</label>
        <caption>
          <p>Distribution of prediction probabilities for all folds in a cross-validation experiment.</p>
        </caption>
        <graphic xlink:href="medinform_v9i12e28632_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>In <xref rid="figure11" ref-type="fig">Figure 11</xref>, we used receiver operating characteristic curves to illustrate the diagnostic ability of the classification model. A separate curve was provided for each of the 5 folds used for cross-validation. The plot shows the TP rate versus the FP rate at each classification threshold. The solid-colored lines correspond to the model’s performance, whereas the gray dashed line represents the performance of a classifier with no skill, that is, the one that always predicts the majority class. An ideal model would result in a curve that bows toward the coordinate (1,0). With its curve consistently lying close to the top-left corner, our model demonstrated very good classification performance. We summarized the receiver operating characteristic results by calculating the area under the curve to measure the ability of our model to distinguish between the 2 classes, with higher values indicating better performance. With an overall mean score of 0.8789 (SD 0.0101) and a range between 0 and 1, our model was clearly able to distinguish between adverse events and underlying conditions 87.79% of the time on average.</p>
      <p>Finally, to account for the class imbalance, we also looked at the precision-recall (PR) curve shown in <xref rid="figure12" ref-type="fig">Figure 12</xref>. Again, the solid-colored lines correspond to our model’s performance, whereas the gray dashed horizontal line corresponds to a model with no skill, that is, a model whose P is equal to the proportion of positive samples. The PR curve of our model was relatively close to that of an ideal model, whose curve would bow toward the coordinate (1,1). In comparison to a no skill model, which would achieve a PR area under the curve score of 0.6533, our model reached a high score of 0.9108 (SD 0.0103), demonstrating its ability to correctly classify adverse events despite the class imbalance.</p>
      <fig id="figure11" position="float">
        <label>Figure 11</label>
        <caption>
          <p>Receiver operating characteristic curve for each fold in a cross-validation experiment.</p>
        </caption>
        <graphic xlink:href="medinform_v9i12e28632_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <fig id="figure12" position="float">
        <label>Figure 12</label>
        <caption>
          <p>Precision-recall curve for each fold in a cross-validation experiment.</p>
        </caption>
        <graphic xlink:href="medinform_v9i12e28632_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Previously, we provided details on calculating the interannotator agreement using P, R, and F1 score. When a system is evaluated against the ground truth, the corresponding values establish the human performance baseline, which in this case were P=0.9370, R=0.8836, and F1=0.9095. If we compare these values against the results provided in <xref ref-type="table" rid="table4">Table 4</xref>, we can observe a 10.15 percent points difference in the F1 score. In particular, we notice that the system’s R is 10.34 percent points lower than its P. There are 2 potential sources of type 2 errors in the system. Remember that the system first uses MetaMap to identify potential adverse events, which are then classified by BERT as positive or negative. Both components can give rise to FN results. First, any adverse event that MetaMap failed to forward to BERT would have been automatically counted as an FN. Second, any adverse event that MetaMap did supply to BERT for further classification could have still ended in an FN. MetaMap is a predefined rule-based system, and as such, its performance within our system is limited by external factors. BERT, on the other hand, has been trained for a specific task using the data set described here. Therefore, it is worth focusing specifically on its classification performance.</p>
        <p>To evaluate how well BERT learned to classify adverse events, we removed those FNs from the ground truth that were never actually classified by BERT because of MetaMap failing to identify them in the first place. <xref ref-type="table" rid="table5">Table 5</xref> provides the cross-validation results for BERT’s performance alone. We observe that the classification performance alone is much closer to the human performance baseline, lagging behind the F1 score by only 2.93 percent points.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Bidirectional Encoder Representations from Transformers’ (BERT) performance.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="270"/>
            <col width="490"/>
            <col width="240"/>
            <thead>
              <tr valign="top">
                <td>Parameters</td>
                <td>Named entity recognition (BERT), mean (SD)</td>
                <td>Classification (BERT), mean (SD)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Precision</td>
                <td>0.7484 (0.0066)</td>
                <td>0.8651 (0.0053)</td>
              </tr>
              <tr valign="top">
                <td>Recall</td>
                <td>0.8237 (0.0086)</td>
                <td>0.8974 (0.0104)</td>
              </tr>
              <tr valign="top">
                <td>F1 score</td>
                <td>0.7835 (0.0053)</td>
                <td>0.8802 (0.0044)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>If we now compare BERT’s classification performance given in <xref ref-type="table" rid="table5">Table 5</xref> with the overall system performance given in <xref ref-type="table" rid="table4">Table 4</xref>, we can see that the P is virtually identical (0.8638 vs 0.8651), whereas R differs by 13.70 percent points (0.7604 vs 0.8974). Hence, we can conclude that the R of the overall system is primarily limited by MetaMap’s performance, which naturally raises the question of whether its use as a preprocessing step within our system was appropriate. The baseline method uses MetaMap as the postprocessing step; therefore, we investigated the extent of its effect on the overall performance by singling out BERT’s performance on the NER task, which was evaluated using the exact matching of phrases annotated in the ground truth. If we compare the first column of <xref ref-type="table" rid="table5">Table 5</xref> with the second column of <xref ref-type="table" rid="table4">Table 4</xref>, we can observe that without MetaMap, BERT can certainly achieve higher R (0.8237 vs 0.7604) when it is allowed to determine the phrase boundaries on its own rather than having them prescribed by MetaMap.</p>
        <p>Although such an approach is unarguably more flexible, it can also have a negative impact when the goal of the system is to code adverse events rather than only recognize their mentions in the text. If the phrase boundaries are not correctly detected as part of the NER task, then searching the UMLS using an incorrectly extracted phrase may provide an incorrect code. Consider, for example, 2 adverse events, <italic>respiratory tract infection</italic> (whose code in the UMLS is C0035243) and <italic>urinary tract infection</italic> (whose code is C0042029). Suppose that a system failed to correctly identify their boundaries, for example, by suggesting <italic>tract infection</italic> in both cases. The UMLS has no concept referring to <italic>tract infection</italic>; therefore, MetaMap would at best suggest <italic>infection</italic> (whose code is C3714514) as the closest concept matching the given search term, thus incorrectly coding both <italic>respiratory tract infection</italic> and <italic>urinary tract infection</italic>, resulting in 2 FNs (labeled C0035243 and C0042029 in the ground truth) and 2 FPs (both labeled C3714514 by the system). On the other hand, MetaMap can be configured to recognize the longest phrases from relevant semantic types and, in that way, impose tighter control of the process, reducing the number of both FPs and FNs. Although MetaMap may limit R, it does play an important role in controlling the P in our proposed approach, as the results in <xref ref-type="table" rid="table4">Table 4</xref> clearly depict. Nonetheless, MetaMap could benefit from revising its rule-based dictionary lookup approach in light of the new advances in text mining and, in particular, deep learning approaches to bring its performance in line with the state of the art.</p>
        <p>Focusing on BERT’s performance alone in <xref ref-type="table" rid="table5">Table 5</xref>, we can see that it performs better on the binary classification task than the NER task. This is not surprising, as the sequence labeling task is inherently more complex than binary classification. This is because of the number of possible sequences growing exponentially with the length of a document. In particular, the performance gap is bound to widen when training the corresponding models on a relatively small data set, as is the case in this study. Having &#60;300 annotated documents available, we can see from <xref ref-type="table" rid="table5">Table 5</xref> that BERT’s performance on the classification task is in the high 80s across all metrics, whereas its performance on the NER task is in the high 70s overall. This again justifies our choice to run BERT after MetaMap rather than the other way around.</p>
        <p>Going back to the BERT’s classification performance provided in <xref ref-type="table" rid="table5">Table 5</xref>, while examining the misclassified examples, we noticed some patterns. Some simple negation patterns were not captured by the classifier. For example, in the document containing the sentence “Chest X-ray showed no new lesion, no pleural effusion disorder or pneumothorax and history of smoking,” both <italic>pleural effusion disorder</italic> and <italic>pneumothorax</italic> were misclassified as adverse events. Similarly, in the document with the sentence “admitted with right scaptula/back pain, no chest pain or dyspnea,” both <italic>chest pain</italic> and <italic>dyspnea</italic> were misclassified as adverse events.</p>
        <p>This finding is in line with the current evidence that neural models struggle to generalize negation to out-of-sample data sets, even within the same domain [<xref ref-type="bibr" rid="ref49">49</xref>]. The generalizability of negation remains a challenge, as none of the factors considered, including the annotation guidelines, the amount of data available, and their lexical and syntactic properties, fully explained the poor performance [<xref ref-type="bibr" rid="ref50">50</xref>]. Empirical evidence suggests that the use of domain-specific embeddings such as BioBERT [<xref ref-type="bibr" rid="ref48">48</xref>] may improve negation detection [<xref ref-type="bibr" rid="ref51">51</xref>]. BERT can also be fine-tuned to support the negation detection task in clinical text [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]; however, this requires data to be annotated specifically for this task. Nonetheless, manual adaptation, be it rule modification or in-domain data annotation, remains a recommended strategy for optimizing performance in clinical natural language processing [<xref ref-type="bibr" rid="ref50">50</xref>]. Rule-based systems for negation detection such as ConText [<xref ref-type="bibr" rid="ref53">53</xref>] seem to transfer well within a domain [<xref ref-type="bibr" rid="ref54">54</xref>]. Therefore, the simplest and most effective way of addressing negation as the source of errors in our proposed framework would be to use the ConText algorithm [<xref ref-type="bibr" rid="ref53">53</xref>] to detect negated contexts and automatically exclude them from further consideration.</p>
        <p>Some words, such as the word <italic>decreasing</italic>, can have the opposite effect depending on the context in which it is used. For example, <italic>decreased mobility</italic> implies a negative effect, whereas <italic>decreased pain</italic> implies a positive effect and not an adverse event. The system was not able to differentiate between such contexts. This could be remedied by incorporating domain knowledge about candidate adverse events. Alternatively, with a larger training data set, these properties could be learned directly from the data.</p>
        <p>Finally, the classification model struggled when a given concept was used in multiple contexts. For example, for the concept <italic>infection</italic> in the document extract “admitted to hospital with lower respiratory tract infection [...] not commenced chemotherapy related infection,” the model misinterpreted the latter mention as a negated one and, consequently, misclassified this adverse event.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study established the feasibility of automated coding of adverse events described in the narrative section of the SAE reports. This, in turn, enables statistical analysis of adverse events and the patterns of such events so that any correlations with the use of medicines can be estimated in a timely fashion. An easy adaptation of an existing deep learning architecture trained on a relatively small data set demonstrates that similar tools can be built rapidly. In addition, the evaluation results show that such tools also perform with high accuracy. This performance can be attributed to the choice of the method. BERT is already pretrained on a large unlabeled corpus, which allows it to be fine-tuned on a small, labeled corpus for a specialized task. This is particularly relevant for clinical text mining applications, where the data annotation bottleneck has been identified as one of the key obstacles to machine learning approaches for clinical text mining [<xref ref-type="bibr" rid="ref55">55</xref>].</p>
        <p>Unfortunately, the relevant data are still mainly handwritten, which means that they cannot be immediately processed in the way proposed in this study. There are 2 ways in which this issue can be addressed. We can work with the stakeholders to change the policy on the means of collecting information on SAEs, for example, by transcribing the notes when they reach the safety and pharmacovigilance teams in the central trial unit, by requiring them to be typed, or by using some combination of these 2 approaches.</p>
        <p>Alternatively, we can propose to develop methods to digitize handwritten notes automatically using tools such as Transkribus [<xref ref-type="bibr" rid="ref56">56</xref>], which have been designed to digitize historical documents and allow the training of specific text recognition models. This would have a great potential for impact on safety by digitizing and mining legacy data from previous trials, where some medicinal products may have already reached the market, thus exposing the population to previously overlooked safety concerns. Currently, these issues prevent a systematic analysis of the information provided in the narrative of SAE reports, hence missing an opportunity to identify potential safety signals.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BioBERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers for biomedical text mining</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BOW</term>
          <def>
            <p>bag of words</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CRF</term>
          <def>
            <p>conditional random field</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CTR</term>
          <def>
            <p>Center for Trials Research</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CTU</term>
          <def>
            <p>clinical trial unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">CUI</term>
          <def>
            <p>concept unique identifier</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">FN</term>
          <def>
            <p>false negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">FP</term>
          <def>
            <p>false positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">NER</term>
          <def>
            <p>named entity recognition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">NN</term>
          <def>
            <p>neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">PR</term>
          <def>
            <p>precision-recall</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">SAE</term>
          <def>
            <p>serious adverse event</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">TP</term>
          <def>
            <p>true positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">UMLS</term>
          <def>
            <p>Uniﬁed Medical Language System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors are thankful to Kelly Gee for providing advice and guidance on regulatory expectations and best practices for safeguarding patient safety in clinical trials. This study was funded by the Engineering and Physical Sciences Research Council via the Healthcare Text Analytics Network (HealTex), grant number EP/N027280/1. The Centre for Trials Research receives infrastructure funding from the Health and Care Research Wales and Cancer Research United Kingdom.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>This study was conceptualized by IS; its methodology was developed by IS, DC, MST, and PC. The software work was handled by DC, IS, and PC. The validation was performed by NA, CJ, and MB; the investigations were done by IS and MB; resources were collected by IS and MB; and the data were curated by NA and IS. The original draft of this paper was prepared by IS and, it was reviewed and edited by IS, DC, MST, PC, and MB. The visualizations were created by IS and DC. The study was supervised by IS and MST, with project administration performed by CJ and funding acquisition managed by IS and MB. All authors have read and agreed to the published version of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Data Mining at FDA - White Paper</article-title>
          <source>US Food and Drug Administration</source>
          <year>2018</year>
          <access-date>2021-12-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.fda.gov/science-research/data-mining/data-mining-fda-white-paper">https://www.fda.gov/science-research/data-mining/data-mining-fda-white-paper</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Plasek</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Montecalvo</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing and its implications for the future of medication safety: a narrative review of recent advances and challenges</article-title>
          <source>Pharmacotherapy</source>
          <year>2018</year>
          <month>08</month>
          <day>22</day>
          <volume>38</volume>
          <issue>8</issue>
          <fpage>822</fpage>
          <lpage>41</lpage>
          <pub-id pub-id-type="doi">10.1002/phar.2151</pub-id>
          <pub-id pub-id-type="medline">29884988</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Botsis</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Woo</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Markatou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ball</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Text mining for the Vaccine Adverse Event Reporting System: medical text classification using informative feature selection</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>631</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21709163"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2010-000022</pub-id>
          <pub-id pub-id-type="medline">21709163</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2010-000022</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168300</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chee</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Berlin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Schatz</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Predicting adverse drug events from personal health messages</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2011</year>
          <volume>2011</volume>
          <fpage>217</fpage>
          <lpage>26</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22195073"/>
          </comment>
          <pub-id pub-id-type="medline">22195073</pub-id>
          <pub-id pub-id-type="pmcid">PMC3243174</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Botsis</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Buttolph</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Winiecki</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Woo</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ball</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Vaccine adverse event text mining system for extracting features from vaccine safety reports</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2012</year>
          <volume>19</volume>
          <issue>6</issue>
          <fpage>1011</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22922172"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2012-000881</pub-id>
          <pub-id pub-id-type="medline">22922172</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2012-000881</pub-id>
          <pub-id pub-id-type="pmcid">PMC3534466</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ball</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pamer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Proestel</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Development of an automated assessment tool for MedWatch reports in the FDA adverse event reporting system</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2017</year>
          <month>09</month>
          <day>01</day>
          <volume>24</volume>
          <issue>5</issue>
          <fpage>913</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28371826"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocx022</pub-id>
          <pub-id pub-id-type="medline">28371826</pub-id>
          <pub-id pub-id-type="pii">3076824</pub-id>
          <pub-id pub-id-type="pmcid">PMC7651970</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Iqbal</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Mallah</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rhodes</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Romero</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Dzahini</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Pandey</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Broadbent</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ibrahim</surname>
              <given-names>ZM</given-names>
            </name>
          </person-group>
          <article-title>ADEPt, a semantically-enriched pipeline for extracting adverse drug events from free-text electronic health records</article-title>
          <source>PLoS One</source>
          <year>2017</year>
          <month>11</month>
          <day>9</day>
          <volume>12</volume>
          <issue>11</issue>
          <fpage>e0187121</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0187121"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0187121</pub-id>
          <pub-id pub-id-type="medline">29121053</pub-id>
          <pub-id pub-id-type="pii">PONE-D-17-16584</pub-id>
          <pub-id pub-id-type="pmcid">PMC5679515</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tonning</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Overview of the TAC 2017 adverse reaction extraction from drug labels track</article-title>
          <source>Proceedings of the Text Analysis Conference (TAC)</source>
          <year>2017</year>
          <conf-name>Proceedings of the Text Analysis Conference (TAC)</conf-name>
          <conf-date>Nov 13-14, 2017</conf-date>
          <conf-loc>Gaithersburg, Maryland, USA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dblp.org/rec/conf/tac/RobertsDT17.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nikfarjam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ginn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pharmacovigilance from social media: mining adverse drug reaction mentions using sequence labeling with word embedding cluster features</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2015</year>
          <month>05</month>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>671</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25755127"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocu041</pub-id>
          <pub-id pub-id-type="medline">25755127</pub-id>
          <pub-id pub-id-type="pii">ocu041</pub-id>
          <pub-id pub-id-type="pmcid">PMC4457113</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cocos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fiks</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Masino</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Deep learning for pharmacovigilance: recurrent neural network architectures for labeling adverse drug reactions in Twitter posts</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2017</year>
          <month>07</month>
          <day>01</day>
          <volume>24</volume>
          <issue>4</issue>
          <fpage>813</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28339747"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocw180</pub-id>
          <pub-id pub-id-type="medline">28339747</pub-id>
          <pub-id pub-id-type="pii">3041102</pub-id>
          <pub-id pub-id-type="pmcid">PMC7651964</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Deep learning approaches for extracting adverse events and indications of dietary supplements from clinical text</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>03</month>
          <day>01</day>
          <volume>28</volume>
          <issue>3</issue>
          <fpage>569</fpage>
          <lpage>77</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33150942"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa218</pub-id>
          <pub-id pub-id-type="medline">33150942</pub-id>
          <pub-id pub-id-type="pii">5956340</pub-id>
          <pub-id pub-id-type="pmcid">PMC7936508</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Duke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Friedlin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>ADESSA: a real-time decision support service for delivery of semantically coded adverse drug event data</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2010</year>
          <month>11</month>
          <day>13</day>
          <volume>2010</volume>
          <fpage>177</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21346964"/>
          </comment>
          <pub-id pub-id-type="medline">21346964</pub-id>
          <pub-id pub-id-type="pmcid">PMC3041415</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Combi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zorzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pozzani</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Arzenton</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Moretti</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Normalizing spontaneous reports into MedDRA: some experiments With MagiCoder</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2019</year>
          <month>01</month>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>95</fpage>
          <lpage>102</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2018.2861213</pub-id>
          <pub-id pub-id-type="medline">30059326</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Emadzadeh</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nikfarjam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Hybrid semantic analysis for mapping adverse drug reaction mentions in tweets to medical terminology</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2018</year>
          <month>4</month>
          <day>16</day>
          <volume>2017</volume>
          <fpage>679</fpage>
          <lpage>88</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29854133"/>
          </comment>
          <pub-id pub-id-type="medline">29854133</pub-id>
          <pub-id pub-id-type="pmcid">PMC5977584</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nikfarjam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>GH</given-names>
            </name>
          </person-group>
          <article-title>Pattern mining for extraction of mentions of Adverse Drug Reactions from user comments</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2011</year>
          <volume>2011</volume>
          <fpage>1019</fpage>
          <lpage>26</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22195162"/>
          </comment>
          <pub-id pub-id-type="medline">22195162</pub-id>
          <pub-id pub-id-type="pmcid">PMC3243273</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Portable automatic text classification for adverse drug reaction detection via multi-corpus training</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>02</month>
          <volume>53</volume>
          <fpage>196</fpage>
          <lpage>207</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(14)00231-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2014.11.002</pub-id>
          <pub-id pub-id-type="medline">25451103</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(14)00231-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4355323</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>An ensemble method for extracting adverse drug events from social media</article-title>
          <source>Artif Intell Med</source>
          <year>2016</year>
          <month>06</month>
          <volume>70</volume>
          <fpage>62</fpage>
          <lpage>76</lpage>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2016.05.004</pub-id>
          <pub-id pub-id-type="medline">27431037</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(15)30037-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Markatou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Active computerized pharmacovigilance using natural language processing, statistics, and electronic health records: a feasibility study</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2009</year>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>328</fpage>
          <lpage>37</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19261932"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M3028</pub-id>
          <pub-id pub-id-type="medline">19261932</pub-id>
          <pub-id pub-id-type="pii">M3028</pub-id>
          <pub-id pub-id-type="pmcid">PMC2732239</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Skentzos</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shubina</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Plutzky</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Turchin</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Structured vs. unstructured: factors affecting adverse drug reaction documentation in an EMR repository</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2011</year>
          <volume>2011</volume>
          <fpage>1270</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22195188"/>
          </comment>
          <pub-id pub-id-type="medline">22195188</pub-id>
          <pub-id pub-id-type="pmcid">PMC3243255</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hazlehurst</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Naleway</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mullooly</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Detecting possible vaccine adverse events in clinical notes of the electronic medical record</article-title>
          <source>Vaccine</source>
          <year>2009</year>
          <month>03</month>
          <day>23</day>
          <volume>27</volume>
          <issue>14</issue>
          <fpage>2077</fpage>
          <lpage>83</lpage>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2009.01.105</pub-id>
          <pub-id pub-id-type="medline">19428833</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(09)00168-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Negi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pavuri</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A novel method for drug-adverse event extraction using machine learning</article-title>
          <source>Informatics Med Unlocked</source>
          <year>2019</year>
          <volume>17</volume>
          <fpage>100190</fpage>
          <pub-id pub-id-type="doi">10.1016/j.imu.2019.100190</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kao Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chiang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Detecting potential adverse drug reactions using a deep neural network model</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>02</month>
          <day>06</day>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>e11016</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/2/e11016/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/11016</pub-id>
          <pub-id pub-id-type="medline">30724742</pub-id>
          <pub-id pub-id-type="pii">v21i2e11016</pub-id>
          <pub-id pub-id-type="pmcid">PMC6381404</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Filannino</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Buchan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Arora</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Extracting and normalizing adverse drug reactions from drug labels</article-title>
          <source>Semantic Scholar</source>
          <access-date>2021-12-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.semanticscholar.org/paper/Extracting-and-Normalizing-Adverse-Drug-Reactions-Tao-Lee/4fe1095a50731d74dee6ef93c699c81de744496f#citing-papers">https://tinyurl.com/bdetz4dw</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cocos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Masino</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Combining rule-based and neural network systems for extracting adverse reactions from drug labels</article-title>
          <source>Proceedings of the 2017 Text Analysis Conference, TAC 2017</source>
          <year>2017</year>
          <conf-name>Proceedings of the 2017 Text Analysis Conference, TAC 2017</conf-name>
          <conf-date>Nov 13-14, 2017</conf-date>
          <conf-loc>Gaithersburg, Maryland, USA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tac.nist.gov/publications/2017/participant.papers/TAC2017.CHOP.proceedings.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Belousov</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Milosevic</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Dixon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Nenadic</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Extracting adverse drug reactions and their context using sequence labelling ensembles in TAC2017</article-title>
          <source>Proceedings of the 2017 Text Analysis Conference, TAC 2017</source>
          <year>2019</year>
          <conf-name>Proceedings of the 2017 Text Analysis Conference, TAC 2017</conf-name>
          <conf-date>Nov 13-14, 2017</conf-date>
          <conf-loc>Gaithersburg, Maryland, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dandala</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Mahajan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Devarakonda</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>IBM Research system at TAC 2017: adverse drug reactions extraction from drug labels</article-title>
          <source>Proceedings of the 2017 Text Analysis Conference, TAC 2017</source>
          <year>2017</year>
          <conf-name>Proceedings of the 2017 Text Analysis Conference, TAC 2017</conf-name>
          <conf-date>Nov 13-14, 2017</conf-date>
          <conf-loc>Gaithersburg, Maryland, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>BUPT-PRIS system for TAC 2017 event nugget detection, event argument linking and ADR tracks</article-title>
          <source>Proceedings of the 2017 Text Analysis Conference, TAC 2017,</source>
          <year>2017</year>
          <conf-name>Proceedings of the 2017 Text Analysis Conference, TAC 2017</conf-name>
          <conf-date>Nov 13-14, 2017</conf-date>
          <conf-loc>Gaithersburg, Maryland, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tiftikci</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Özgür</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hur</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BUPT-PRIS System for TAC 2017 Event Nugget Detection, Event Argument Linking and ADR Tracks</article-title>
          <source>Proceedings of the 2017 Text Analysis Conference, TAC 2017</source>
          <year>2017</year>
          <conf-name>Proceedings of the 2017 Text Analysis Conference, TAC 2017</conf-name>
          <conf-date>Nov 13-14, 2017</conf-date>
          <conf-loc>Gaithersburg, Maryland, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H-J</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>UTH CCB system for adverse drug reaction extraction from drug labels at TAC-ADR 2017</article-title>
          <source>Proceedings of the 2017 Text Analysis Conference, TAC 2017</source>
          <year>2017</year>
          <conf-name>Proceedings of the 2017 Text Analysis Conference, TAC 2017</conf-name>
          <conf-date>Nov 13-14, 2017</conf-date>
          <conf-loc>Gaithersburg, Maryland, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pawar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Palshikar</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Bhattacharyya</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ramrakhiyani</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Varma</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>TCS Research at TAC 2017: Joint extraction of entities and relations from drug labels using an ensemble of neural networks</article-title>
          <source>Proceedings of the 2017 Text Analysis Conference, TAC 2017</source>
          <year>2017</year>
          <conf-name>Proceedings of the 2017 Text Analysis Conference, TAC 2017</conf-name>
          <conf-date>Nov 13-14, 2017</conf-date>
          <conf-loc>Gaithersburg, Maryland, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>arXiv</source>
          <year>2018</year>
          <fpage>4805</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1810.04805.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sankaranarayanapillai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Si</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pham</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Extracting postmarketing adverse events from safety reports in the vaccine adverse event reporting system (VAERS) using deep learning</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>07</month>
          <day>14</day>
          <volume>28</volume>
          <issue>7</issue>
          <fpage>1393</fpage>
          <lpage>400</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab014</pub-id>
          <pub-id pub-id-type="medline">33647938</pub-id>
          <pub-id pub-id-type="pii">6153955</pub-id>
          <pub-id pub-id-type="pmcid">PMC8279785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>WK</given-names>
            </name>
            <name name-style="western">
              <surname>Herr</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Berendsen</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Jonnalagadda</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Carson</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Starren</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing for EHR-based pharmacovigilance: a structured review</article-title>
          <source>Drug Saf</source>
          <year>2017</year>
          <month>11</month>
          <volume>40</volume>
          <issue>11</issue>
          <fpage>1075</fpage>
          <lpage>89</lpage>
          <pub-id pub-id-type="doi">10.1007/s40264-017-0558-6</pub-id>
          <pub-id pub-id-type="medline">28643174</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40264-017-0558-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Neves</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Leser</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>A survey on annotation tools for the biomedical literature</article-title>
          <source>Brief Bioinform</source>
          <year>2014</year>
          <month>03</month>
          <day>18</day>
          <volume>15</volume>
          <issue>2</issue>
          <fpage>327</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1093/bib/bbs084</pub-id>
          <pub-id pub-id-type="medline">23255168</pub-id>
          <pub-id pub-id-type="pii">bbs084</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tomanek</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hahn</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Proceedings of the Linguistic Annotation Workshop; Suntec, Singapore2009</article-title>
          <year>2021</year>
          <conf-name>Proceedings of the Linguistic Annotation Workshop; Suntec, Singapore2009</conf-name>
          <conf-date>Proceedings of the Linguistic Annotation Workshop; Suntec, Singapore2009</conf-date>
          <conf-loc>Proceedings of the Linguistic Annotation Workshop; Suntec, Singapore2009</conf-loc>
          <fpage>112</fpage>
          <lpage>5</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deleger</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Lingren</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Molnar</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Stoutenborough</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kouril</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Marsolo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Solti</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Building gold standard corpora for medical natural language processing tasks</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2012</year>
          <volume>2012</volume>
          <fpage>144</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23304283"/>
          </comment>
          <pub-id pub-id-type="medline">23304283</pub-id>
          <pub-id pub-id-type="pmcid">PMC3540456</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rothschild</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>Agreement, the f-measure, and reliability in information retrieval</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2005</year>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>296</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/15684123"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M1733</pub-id>
          <pub-id pub-id-type="medline">15684123</pub-id>
          <pub-id pub-id-type="pii">M1733</pub-id>
          <pub-id pub-id-type="pmcid">PMC1090460</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The Unified Medical Language System (UMLS): integrating biomedical terminology</article-title>
          <source>Nucleic Acids Res</source>
          <year>2004</year>
          <month>01</month>
          <day>01</day>
          <volume>32</volume>
          <issue>Database issue</issue>
          <fpage>D267</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/14681409"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id>
          <pub-id pub-id-type="medline">14681409</pub-id>
          <pub-id pub-id-type="pii">32/suppl_1/D267</pub-id>
          <pub-id pub-id-type="pmcid">PMC308795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spasic</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Krzeminski</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Corcoran</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Balinsky</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Cohort selection for clinical trials from longitudinal patient records: text mining approach</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>10</month>
          <day>31</day>
          <volume>7</volume>
          <issue>4</issue>
          <fpage>e15980</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/4/e15980/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/15980</pub-id>
          <pub-id pub-id-type="medline">31674914</pub-id>
          <pub-id pub-id-type="pii">v7i4e15980</pub-id>
          <pub-id pub-id-type="pmcid">PMC6913747</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Lang</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>An overview of MetaMap: historical perspective and recent advances</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>229</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20442139"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.002733</pub-id>
          <pub-id pub-id-type="medline">20442139</pub-id>
          <pub-id pub-id-type="pii">17/3/229</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spasic</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Sarafraz</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Keane</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Nenadic</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Medication information extraction with linguistic pattern matching and semantic rules</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>532</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20819858"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2010.003657</pub-id>
          <pub-id pub-id-type="medline">20819858</pub-id>
          <pub-id pub-id-type="pii">17/5/532</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995671</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of words and phrases and their compositionality</article-title>
          <source>Adv Neur Inf Process Syst</source>
          <year>2013</year>
          <month>12</month>
          <volume>2</volume>
          <fpage>3111</fpage>
          <lpage>3119</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/2999792.2999959"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennington</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>GloVe: global vectors for word representation</article-title>
          <source>Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP)</source>
          <year>2014</year>
          <conf-name>Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP)</conf-name>
          <conf-date>2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>ZS</given-names>
            </name>
          </person-group>
          <article-title>Distributional structure</article-title>
          <source>Word</source>
          <year>2015</year>
          <month>12</month>
          <day>04</day>
          <volume>10</volume>
          <issue>2-3</issue>
          <fpage>146</fpage>
          <lpage>62</lpage>
          <pub-id pub-id-type="doi">10.1080/00437956.1954.11659520</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>ArXiv.org</source>
          <year>2017</year>
          <access-date>2021-12-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1706.03762">https://arxiv.org/abs/1706.03762</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Schuster</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Norouzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Macherey</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Google's neural machine translation system: bridging the gap between human and machine translation</article-title>
          <source>ArXiv.org</source>
          <year>2016</year>
          <access-date>2021-12-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1609.08144">https://arxiv.org/abs/1609.08144</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abadi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Barham</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Tensorflow: a system for large-scale machine learning</article-title>
          <source>Proceedings of the 12th USENIX conference on Operating Systems Design and Implementation</source>
          <year>2016</year>
          <conf-name>12th USENIX Symposium on Operating Systems DesignImplementation</conf-name>
          <conf-date>Nov 2 - 4, 2016</conf-date>
          <conf-loc>Savannah, GA.  USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>02</month>
          <day>15</day>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1234</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31501885"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
          <pub-id pub-id-type="medline">31501885</pub-id>
          <pub-id pub-id-type="pii">5566506</pub-id>
          <pub-id pub-id-type="pmcid">PMC7703786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grivas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alex</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grover</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tobin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Whiteley</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Not a cute stroke: analysis of rule- and neural network-based information extraction systems for brain radiology reports</article-title>
          <source>Proceedings of the 11th International Workshop on Health Text Mining and Information Analysis at the Conference on Empirical Methods in Natural Language Processing</source>
          <year>2020</year>
          <conf-name>Proceedings of the 11th International Workshop on Health Text Mining and Information Analysis at the Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>2020</conf-date>
          <conf-loc>Louhi, Finland</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2020.louhi-1.4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Masanz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Coarr</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Halgrim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Carrell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Negation's not solved: generalizability versus optimizability in clinical natural language processing</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <month>11</month>
          <day>13</day>
          <volume>9</volume>
          <issue>11</issue>
          <fpage>e112774</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0112774"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0112774</pub-id>
          <pub-id pub-id-type="medline">25393544</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-09493</pub-id>
          <pub-id pub-id-type="pmcid">PMC4231086</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rivera Zavala</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Martinez</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The impact of pretrained language models on negation and speculation detection in cross-lingual medical text: comparative study</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>12</month>
          <day>03</day>
          <volume>8</volume>
          <issue>12</issue>
          <fpage>e18953</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/12/e18953/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/18953</pub-id>
          <pub-id pub-id-type="medline">33270027</pub-id>
          <pub-id pub-id-type="pii">v8i12e18953</pub-id>
          <pub-id pub-id-type="pmcid">PMC7746498</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bethard</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dligach</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sadeque</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Does BERT need domain adaptation for clinical negation detection?</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>04</month>
          <day>01</day>
          <volume>27</volume>
          <issue>4</issue>
          <fpage>584</fpage>
          <lpage>91</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32044989"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa001</pub-id>
          <pub-id pub-id-type="medline">32044989</pub-id>
          <pub-id pub-id-type="pii">5733888</pub-id>
          <pub-id pub-id-type="pmcid">PMC7075528</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harkema</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Dowling</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Thornblade</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
          </person-group>
          <article-title>ConText: an algorithm for determining negation, experiencer, and temporal status from clinical reports</article-title>
          <source>J Biomed Inform</source>
          <year>2009</year>
          <month>10</month>
          <volume>42</volume>
          <issue>5</issue>
          <fpage>839</fpage>
          <lpage>51</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(09)00074-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2009.05.002</pub-id>
          <pub-id pub-id-type="medline">19435614</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(09)00074-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC2757457</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sykes</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Grivas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Grover</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tobin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sudlow</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Whiteley</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Mcintosh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Whalley</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Alex</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Comparison of rule-based and neural network models for negation detection in radiology reports</article-title>
          <source>Nat Lang Eng</source>
          <year>2020</year>
          <month>11</month>
          <day>18</day>
          <volume>27</volume>
          <issue>2</issue>
          <fpage>203</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1017/s1351324920000509</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spasic</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nenadic</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Clinical text data in machine learning: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>03</month>
          <day>31</day>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>e17984</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/3/e17984/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17984</pub-id>
          <pub-id pub-id-type="medline">32229465</pub-id>
          <pub-id pub-id-type="pii">v8i3e17984</pub-id>
          <pub-id pub-id-type="pmcid">PMC7157505</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kahle</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Colutto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hackl</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mühlberger</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Transkribus - A service platform for transcription, recognition and retrieval of historical documents</article-title>
          <source>Proceedings of the 14th IAPR International Conference on Document Analysis and Recognition (ICDAR); Kyoto, Japan</source>
          <year>2017</year>
          <conf-name>14th IAPR International Conference on Document Analysis and Recognition (ICDAR); Kyoto, Japan</conf-name>
          <conf-date>Nov 9-15, 2017</conf-date>
          <conf-loc>Kyoto, Japan</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icdar.2017.307</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
