<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v11i1e46348</article-id>
      <article-id pub-id-type="pmid">37097731</article-id>
      <article-id pub-id-type="doi">10.2196/46348</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Deep Learning Approach for Negation and Speculation Detection for Automated Important Finding Flagging and Extraction in Radiology Report: Internal Validation and Technique Comparison Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lin</surname>
            <given-names>Chun-Jung</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hu</surname>
            <given-names>Danqing</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhao</surname>
            <given-names>Peng, PhD</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kim</surname>
            <given-names>Seongsoon</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Weng</surname>
            <given-names>Kung-Hsun</given-names>
          </name>
          <degrees>MSc, MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4207-1512</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Chung-Feng</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6698-0273</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Chia-Jung</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <address>
            <institution>Department of Information Systems</institution>
            <institution>Chi Mei Medical Center</institution>
            <addr-line>No.901, Zhonghua Rd.</addr-line>
            <addr-line>Yongkang Dist.</addr-line>
            <addr-line>Tainan, 71004</addr-line>
            <country>Taiwan</country>
            <phone>886 6 2812811 ext 52069</phone>
            <email>carolchen@mail.chimei.org.tw</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5364-7209</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Medical Imaging</institution>
        <institution>Chi Mei Medical Center, Chiali</institution>
        <addr-line>Tainan</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Medical Research</institution>
        <institution>Chi Mei Medical Center</institution>
        <addr-line>Tainan</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Information Systems</institution>
        <institution>Chi Mei Medical Center</institution>
        <addr-line>Tainan</addr-line>
        <country>Taiwan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Chia-Jung Chen <email>carolchen@mail.chimei.org.tw</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>25</day>
        <month>4</month>
        <year>2023</year>
      </pub-date>
      <volume>11</volume>
      <elocation-id>e46348</elocation-id>
      <history>
        <date date-type="received">
          <day>8</day>
          <month>2</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>11</day>
          <month>3</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>21</day>
          <month>3</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>24</day>
          <month>3</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Kung-Hsun Weng, Chung-Feng Liu, Chia-Jung Chen. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 25.04.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2023/1/e46348" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Negation and speculation unrelated to abnormal findings can lead to false-positive alarms for automatic radiology report highlighting or flagging by laboratory information systems.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This internal validation study evaluated the performance of natural language processing methods (NegEx, NegBio, NegBERT, and transformers).</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We annotated all negative and speculative statements unrelated to abnormal findings in reports. In experiment 1, we fine-tuned several transformer models (ALBERT [A Lite Bidirectional Encoder Representations from Transformers], BERT [Bidirectional Encoder Representations from Transformers], DeBERTa [Decoding-Enhanced BERT With Disentangled Attention], DistilBERT [Distilled version of BERT], ELECTRA [Efficiently Learning an Encoder That Classifies Token Replacements Accurately], ERNIE [Enhanced Representation through Knowledge Integration], RoBERTa [Robustly Optimized BERT Pretraining Approach], SpanBERT, and XLNet) and compared their performance using precision, recall, accuracy, and <italic>F</italic><sub>1</sub>-scores. In experiment 2, we compared the best model from experiment 1 with 3 established negation and speculation-detection algorithms (NegEx, NegBio, and NegBERT).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our study collected 6000 radiology reports from 3 branches of the Chi Mei Hospital, covering multiple imaging modalities and body parts. A total of 15.01% (105,755/704,512) of words and 39.45% (4529/11,480) of important diagnostic keywords occurred in negative or speculative statements unrelated to abnormal findings. In experiment 1, all models achieved an accuracy of &#62;0.98 and <italic>F</italic><sub>1</sub>-score of &#62;0.90 on the test data set. ALBERT exhibited the best performance (accuracy=0.991; <italic>F</italic><sub>1</sub>-score=0.958). In experiment 2, ALBERT outperformed the optimized NegEx, NegBio, and NegBERT methods in terms of overall performance (accuracy=0.996; <italic>F</italic><sub>1</sub>-score=0.991), in the prediction of whether diagnostic keywords occur in speculative statements unrelated to abnormal findings, and in the improvement of the performance of keyword extraction (accuracy=0.996; <italic>F</italic><sub>1</sub>-score=0.997).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The ALBERT deep learning method showed the best performance. Our results represent a significant advancement in the clinical applications of computer-aided notification systems.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>radiology report</kwd>
        <kwd>natural language processing</kwd>
        <kwd>negation</kwd>
        <kwd>deep learning</kwd>
        <kwd>transfer learning</kwd>
        <kwd>supervised learning</kwd>
        <kwd>validation study</kwd>
        <kwd>Bidirectional Encoder Representations from Transformers</kwd>
        <kwd>BERT</kwd>
        <kwd>clinical application</kwd>
        <kwd>radiology</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Timely and effective communication of test results is essential in modern medicine. To promptly address patients’ problems, hospitals must ensure that the test results are completed without delay and that clinicians are aware of substantial abnormal findings. Delayed or failed communication of important findings by the department performing the test and the clinical team can increase the risk of adverse patient events and result in medical malpractice and compensation, especially for potentially life-threatening and important diagnoses [<xref ref-type="bibr" rid="ref1">1</xref>].</p>
        <p>Although radiology reports are the primary method of communication between radiology and clinical departments, the fact that a radiologist produces a report does not necessarily mean that the clinician reads it entirely. Ignácio et al [<xref ref-type="bibr" rid="ref2">2</xref>] showed that only 55.7% of clinicians read the entire report thoroughly. Reda et al [<xref ref-type="bibr" rid="ref3">3</xref>] showed that &#62;40% of clinicians read only the conclusions or only read the conclusions in detail. More than 30% of clinicians have made preventable medical errors because they did not read radiology reports carefully. Even if the radiologist has made the correct diagnosis in the report, the clinician may still miss it.</p>
        <p>To address these communication issues, current radiology guidelines [<xref ref-type="bibr" rid="ref4">4</xref>] now require radiologists to go beyond report completion and use additional communication methods for reports with significant findings, including flagging or alerting the report, e-mailing, or direct verbal communication via telephone. Natural language processing can also automatically extract data from radiology reports, for example, automatically extracting important diagnoses, follow-up data, or management recommendations or automatically identifying reports that require specific action [<xref ref-type="bibr" rid="ref5">5</xref>]. These methods can help to identify important information in radiology reports or reports that need to be read in detail to alert clinicians.</p>
        <p>In addition, the laboratory information system (LIS) used in hospitals today can automatically highlight abnormalities found in tests and display them differently to ensure that clinicians do not miss important findings, such as using different colors or special symbols [<xref ref-type="bibr" rid="ref6">6</xref>]. For example, in our hospital, if a patient has undergone a routine blood test and some of the blood cell counts are abnormal, the LIS will automatically display the results on the computer screen in a unique color for the abnormal values and a typical color for the others. The LIS also displays important keywords (eg, nodules) within radiology reports in different colors.</p>
        <p>However, because most radiology reports are freely typed by radiologists in an unstructured manner, both techniques encounter challenges. Negative and speculative statements are significant problems.</p>
        <p>Radiologists can use negative statements to communicate the absence of specific diagnoses and provide a clearer picture of the patient’s condition. For example, the statement “No definite CT evidence of aortic dissection” informs the clinician that the patient’s condition is not related to aortic dissection.</p>
        <p>The diagnoses in the speculative statements may or may not be related to the actual abnormal findings. The radiology report may contain speculative statements in the presence of an imaging finding of uncertain significance that requires further investigation, for example, “RUL lung nodule. Lung cancer should be suspected.” In such cases, the diagnoses (lung cancer) in the speculative statements are related to abnormal findings. Even if the radiologist finds no problems with the study, the radiology report may still contain speculative statements to prevent potential medicolegal issues. Disclaimer (eg, “10%-15% of cases of breast cancer are missed on mammograms” [<xref ref-type="bibr" rid="ref7">7</xref>]) or statement of limitations (eg, “non-enhanced images, small lesion may be obscured”) are common examples. In such cases, the diagnoses (breast cancer or lesion) in the speculative statements are unrelated to the actual diagnoses.</p>
        <p>A notification system that does not distinguish whether diagnostic Information is contained in negative or speculative statements unrelated to abnormal findings and annotates or extracts all of them to “alert” the clinician may generate excessive false alarms. Excessive false alarms can overload the clinician’s senses and lead to the “cry wolf” phenomenon, causing alarm fatigue. Consequently, clinicians may delay detection or even ignore truly valuable alerts, posing a risk to patients, especially if the percentage of false alarms is high [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
        <p>This study aimed to address the potential analytical inaccuracies resulting from negative and speculative statements in radiology reports and to facilitate the use of unstructured reports by hospital information systems.</p>
      </sec>
      <sec>
        <title>Prior Work</title>
        <p>Current studies have adopted various approaches to detect negation and speculation, including rule-based, machine learning–based, and deep learning–based approaches [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref17">17</xref>].</p>
        <p>The rule-based approach relies on experts to define the rules that are understandable to humans. NegEx, proposed by Chapman et al [<xref ref-type="bibr" rid="ref18">18</xref>]; NegFinder, proposed by Mutalik et al [<xref ref-type="bibr" rid="ref19">19</xref>]; NegHunter, proposed by Gindl et al [<xref ref-type="bibr" rid="ref20">20</xref>]; and NegExpander, proposed by Aronow et al [<xref ref-type="bibr" rid="ref21">21</xref>], are regular expression-based approaches. Regular expression-based methods have limitations, such as the inability to capture the syntactic structure and the possibility of misinterpreting the scope of the negative and speculative statements. For example, “No change of tumor” may be misinterpreted as both “No change” and “No tumor.”</p>
        <p>Methods such as DEEPEN (Dependency Parser Negation), proposed by Mehrabi et al [<xref ref-type="bibr" rid="ref22">22</xref>], and NegBio, proposed by Peng et al [<xref ref-type="bibr" rid="ref23">23</xref>], analyze the syntactic structure based on grammar. These methods are more accurate than regular expression-based approaches in limiting the scope of negative and speculative statements and reducing false positives because these methods consider the dependency relationship between words. However, these methods have certain limitations. For example, errors in the analysis may occur if the grammar of the text deviates from typical norms, such as the presence of long noun phrases [<xref ref-type="bibr" rid="ref23">23</xref>]. When analyzing text, most of these methods [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>] split the text into sentences that are analyzed independently. The algorithms and expert-defined rules only consider a single sentence at once and do not consider both the preceding and following contexts.</p>
        <p>With the advancement of artificial intelligence, machine learning techniques have been applied to detect negation and speculation. For example, Medlock et al [<xref ref-type="bibr" rid="ref24">24</xref>] proposed a weakly supervised learning–based approach to predict the labels of training samples for machine learning training and used the trained models to detect speculation in biomedical texts. Rokach et al [<xref ref-type="bibr" rid="ref25">25</xref>] compared several machine learning approaches, including the Hidden Markov Model, Conditional Random Field (CRF), decision tree, and AdaBoost, cascaded decision tree classifiers with and without the Longest Common Sequence. They found that the cascaded decision tree with the Longest Common Sequence performed best. Morante et al proposed k-nearest neighbor algorithm–based [<xref ref-type="bibr" rid="ref26">26</xref>] and meta-learning–based approaches [<xref ref-type="bibr" rid="ref27">27</xref>]. Ou et al [<xref ref-type="bibr" rid="ref28">28</xref>] compared rule-based and support vector machine–based machine learning methods and obtained better performance of machine learning methods.</p>
        <p>Later studies began investigating deep learning–based approaches and achieved better results than previous non–deep learning approaches. Qian et al [<xref ref-type="bibr" rid="ref17">17</xref>] were the first to propose a deep learning method for negation and speculation detection using a convolutional neural network–based model by using the relative position of tokens and path features from syntactic trees as features.</p>
        <p>By contrast, recurrent neural networks and their derivatives, such as Long Short-Term Memory (LSTM), are suitable for processing sequential data. These architectures can incorporate dependencies on preceding and following elements, making them particularly useful for natural language processing tasks, and have achieved good results in recognizing negations and speculations. For example, in a study by Fancellu et al [<xref ref-type="bibr" rid="ref14">14</xref>], a Bidirectional LSTM (BiLSTM)–based model was applied, and it demonstrated better performance than other methods on the Sherlock data set. Lazib et al [<xref ref-type="bibr" rid="ref9">9</xref>] compared methods, including LSTM, BiLSTM, Gated Recurrent Unit, and CRF, and showed that the recurrent neural network–based architecture performed the best. Gautam et al [<xref ref-type="bibr" rid="ref15">15</xref>] compared several LSTM-based models and obtained the best performance using 2-layer encoders and decoders with dropouts. Taylor et al [<xref ref-type="bibr" rid="ref10">10</xref>] applied the BiLSTM-based model to the analysis of negation in electroencephalography reports. Sergeeva et al [<xref ref-type="bibr" rid="ref11">11</xref>] proposed an LSTM-based approach and investigated the effect of expert-provided negation cues on the detection performance of the negation scopes. Sykes et al [<xref ref-type="bibr" rid="ref12">12</xref>] compared the methods based on BiLSTM and feedforward neural networks and rule-based methods, including pyConText, NegBio, and EdIE-R, for negation detection in radiology reports. The BiLSTM-based approach outperformed other approaches.</p>
        <p>BERT (Bidirectional Encoder Representations from Transformers) [<xref ref-type="bibr" rid="ref29">29</xref>], proposed by Google in 2018, is a pretrained, transformer-based model that is effective for negation detection. Khandelwal et al [<xref ref-type="bibr" rid="ref16">16</xref>] developed NegBERT and, in another study [<xref ref-type="bibr" rid="ref13">13</xref>], used a multitasking approach with BERT, XLNet, and RoBERTa (Robustly Optimized BERT Pretraining Approach) for negation and speculation detection, with improved results on BioScope and Simon Fraser University review data sets compared with the control methods. Zavala et al [<xref ref-type="bibr" rid="ref30">30</xref>] proposed a system based on BiLSTM with CRF and fine-tuned BERT; evaluated the methods on English and Spanish clinical, biomedical, and review text; and showed improved performance compared with previous methods. They also found that pretrained word embedding, especially contextualized embedding, helped to understand the biomedical text.</p>
        <p>Numerous variants of BERT have been developed to improve performance and simplify the model. ALBERT (A Lite BERT) [<xref ref-type="bibr" rid="ref31">31</xref>] reduces the model parameters and improves the performance through parameter sharing and matrix decomposition. DistilBERT (Distilled version of BERT) [<xref ref-type="bibr" rid="ref32">32</xref>] uses knowledge distillation to reduce the size and improve the inference speed while retaining most of the language understanding. XLNet [<xref ref-type="bibr" rid="ref33">33</xref>] implements autoregressive training while preserving the advantages of autoencoding models and outperforms BERT on 20 tasks. RoBERTa [<xref ref-type="bibr" rid="ref34">34</xref>] improves the training method to outperform BERT and XLNet. ERNIE (Enhanced Representation through Knowledge Integration) [<xref ref-type="bibr" rid="ref35">35</xref>] uses an alternative masking method to outperform BERT in Chinese tasks. SpanBERT [<xref ref-type="bibr" rid="ref36">36</xref>] extends BERT with span-based masking and an additional training objective, resulting in a better performance on span-based tasks. DeBERTa (Decoding-Enhanced BERT With Disentangled Attention) [<xref ref-type="bibr" rid="ref37">37</xref>] improves BERT and RoBERTa with decoupled attention, improved mask encoder, and virtual adversarial training and outperforms RoBERTa-Large on the Multigenre Natural Language Inference, Stanford Question Answering Data set, and Reading Comprehension data set from examinations tasks and humans on the SuperGLUE task. ELECTRA (Efficiently Learning an Encoder That Classifies Token Replacements Accurately) [<xref ref-type="bibr" rid="ref38">38</xref>] outperforms BERT with a new pretraining task, Replaced Token Detection, and performs similarly to RoBERTa and XLNet with one-fourth the computation.</p>
      </sec>
      <sec>
        <title>Contribution of This Work</title>
        <p>This study has implications for optimizing the performance of hospital information systems in managing unstructured electronic medical records. The key findings and results of this study are as follows.</p>
        <p>First, we found that fine-tuned general-purpose transformer models could outperform NegEx, NegBio, and NegBERT, which are explicitly designed for negation and speculation detection. We identified sources of error in the latter 3 methods and suggested potential improvements.</p>
        <p>Second, we found that transformer, unlike NegEx and NegBio, demonstrated the ability to perform multisentence contextual analysis and further granular classification of speculative statements as related or unrelated to abnormal findings. This capability can improve information filtering in hospital information systems to eliminate nondiagnostically relevant information.</p>
        <p>Finally, in contrast to other studies using BERT [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], we found that using a lightweight transformer model and learning the cues and scopes of negative and speculative sentences in a single step can perform well.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethics Approval</title>
        <p>The Chi Mei Hospital Institutional Review Board reviewed and approved this study (11105-J02). This study is a retrospective analysis study using deidentified electronic medical records, thus obviating the requirement for obtaining informed consent from the individuals. <xref rid="figure1" ref-type="fig">Figure 1</xref> shows the flow diagram of the study.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Research flow. n: number of reports.</p>
          </caption>
          <graphic xlink:href="medinform_v11i1e46348_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Inclusion and Exclusion Criteria</title>
        <p>The inclusion criteria for this study were radiological examinations performed in the 3 branches of our institution between 2012 and 2022, with the reports being written in English language and the type of examination being x-ray, special radiology, computed tomography (CT), magnetic resonance imaging (MRI), or ultrasound. We included cases that met all criteria. The exclusion criteria were Chinese reports and patients aged &#60;20 years at the time of examination. We excluded cases that met any of the exclusion criteria. Samples were collected using 2 independent keyword searches in a search engine targeting radiology reports that met the inclusion criteria but not the exclusion criteria.</p>
      </sec>
      <sec>
        <title>Data</title>
        <sec>
          <title>Overview</title>
          <p>The training and development data set consisted of 5000 radiology reports randomly selected from a keyword search using the terms “fracture,” “dissection,” “infarct,” “pneumothorax,” “extravasation,” “thrombosis,” or “pneumoperitoneum.” The test data set consisted of 1000 reports selected from a keyword search using the terms “tumor,” “consolidation,” “pulmonary TB,” “metastasis,” or “bleeding.” Keywords were selected from our institution’s list of important keywords and randomly assigned to the data sets. These keywords are referred to as “important keywords” in the study. The samples in the training and development and test data sets were mutually exclusive with no overlap.</p>
          <p>The training and development data set was automatically partitioned into training and development data sets in a 9:1 ratio for model training. The training, development, and test data sets ratio was 9:1:2, with 4500, 500, and 1000 radiology reports, respectively.</p>
          <p>In this study, each word or token was assigned to one of the 2 categories, as shown in <xref ref-type="table" rid="table1">Table 1</xref>: “Positive statements, or speculative statements potentially related to abnormal findings” (category 0) and “negative statements, or speculative statements not related to abnormal findings” (category 1). We combined speculative statements unrelated to abnormal findings with negative statements as a single class because of their limited representation. The rationale for category 1 is that the information conveyed is not relevant to abnormal findings and should not trigger highlights or alerts. A token is the minimum output unit of the transformer-based model’s tokenizer.</p>
          <p>All radiology reports included in the study were deidentified by removing identifying information such as medical record number, application number, examination date, ordering department, and examination time. A radiologist with 12 years of experience (KHW) reviewed the reports and annotated all negative and speculative statements unrelated to abnormal findings using the open-source Doccano [<xref ref-type="bibr" rid="ref40">40</xref>] software. The annotation served as the gold standard for subsequent analysis.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Classification of words and tokens in this study.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="270"/>
              <col width="0"/>
              <col width="610"/>
              <col width="0"/>
              <col width="90"/>
              <thead>
                <tr valign="bottom">
                  <td colspan="3">Type<sup>a</sup> and subtype</td>
                  <td colspan="2">Example</td>
                  <td>Category<sup>b</sup></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="3">Negative</td>
                  <td colspan="2">Liver laceration at S6 <italic>without active contrast extravasation</italic></td>
                  <td>1</td>
                </tr>
                <tr valign="top">
                  <td colspan="6">
                    <bold>Speculative</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Unrelated to abnormal findings</td>
                  <td colspan="2">No CT<sup>c</sup> evidence of large infarct. <italic>Suggest MRI<sup>d</sup> to exclude hyperacute infarct if indicated</italic></td>
                  <td colspan="2">1</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Potentially related to abnormal findings</td>
                  <td colspan="2">
                    <italic>Rt<sup>e</sup> cerebellum acute infarct cannot be ruled out.</italic>
                  </td>
                  <td colspan="2">0</td>
                </tr>
                <tr valign="top">
                  <td colspan="3">Positive</td>
                  <td colspan="2">
                    <italic>Rt cerebellum acute infarct</italic>
                  </td>
                  <td>0</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>Type refers to the type of statement.</p>
              </fn>
              <fn id="table1fn2">
                <p><sup>b</sup>Token category in the italicized text if italicization is used. All texts without italics were classified as category 0. Category 0: positive statements or speculative statements potentially related to abnormal findings. Category 1: negative statements or speculative statements not related to abnormal findings.</p>
              </fn>
              <fn id="table1fn3">
                <p><sup>c</sup>CT: computed tomography.</p>
              </fn>
              <fn id="table1fn4">
                <p><sup>d</sup>MRI: magnetic resonance imaging.</p>
              </fn>
              <fn id="table1fn5">
                <p><sup>e</sup>Rt: right.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Included Negations</title>
          <p>This study included all statements in which the radiologist explicitly denied a diagnosis or a finding. Our data included morphological negation and sentential negation, which are common forms of negative statements in English text [<xref ref-type="bibr" rid="ref22">22</xref>]. Morphological negation involves using prefixes, such as “un-” or “ir-,” to modify certain words to express negation. Sentential negation involves using negative words, such as “no” or “without,” to negate part of the statement. In addition, radiologists at the authors’ hospital often use unique symbols or abbreviations, such as “(−)” or “[−].”</p>
        </sec>
        <sec>
          <title>Included Speculations</title>
          <p>In cases where the imaging study is inconclusive but there is still the possibility of a significant abnormality, the information system should notify the clinician and allow the clinician to make the final decision. Therefore, for the task of speculation detection, our focus was limited to speculative statements that were unrelated to abnormal findings. Meanwhile, we treated speculative statements that may correlate with actual abnormal findings as equivalent to positive statements.</p>
          <p>After reviewing the samples, we identified 2 scenarios in which speculative statements could be confidently determined to be unrelated to abnormal findings. First, the radiologist explicitly stated that there was no relevant abnormality. Second, the radiologist stated that certain diagnoses could not be evaluated owing to study limitations. In all the other scenarios, speculative statements may be associated with abnormal findings.</p>
          <p>In the following 3 examples, we classify the diagnoses or findings written in italics as speculative statements unrelated to abnormal findings. The actual test results were normal or unrelated to these diagnoses or findings.</p>
          <list list-type="order">
            <list-item>
              <p>No CT evidence of pulmonary embolism. Suggest V/Q scan to exclude <italic>small branch embolism</italic> if indicated.</p>
            </list-item>
            <list-item>
              <p>No CT evidence of large infarct. Suggest MRI to exclude <italic>hyperacute infarct</italic> if indicated.</p>
            </list-item>
            <list-item>
              <p><italic>Liver tumor</italic> cannot be excluded by noncontrast CT.</p>
            </list-item>
          </list>
          <p>In the following 2 examples, the diagnoses or findings written in italics are speculative statements considered potentially related to actual abnormal findings:</p>
          <list list-type="order">
            <list-item>
              <p>Equivocal filling defect in RLL segmental pulmonary artery. Suggest V/Q scan to exclude <italic>small branch embolism</italic> if indicated.</p>
            </list-item>
            <list-item>
              <p>Rt cerebellum <italic>acute infarct</italic> cannot be ruled out.</p>
            </list-item>
          </list>
        </sec>
      </sec>
      <sec>
        <title>Design of the Experiments</title>
        <p>We conducted 2 experiments to evaluate the ability of general all-purpose pretrained deep learning models and existing negation and speculation-detection algorithms to identify negation and speculation in real-world radiology reports.</p>
        <p>In experiment 1 (<xref rid="figure2" ref-type="fig">Figure 2</xref>), we fine-tuned several transformer-based models using our training and validation data sets. We performed token category prediction (category 0 or 1) for all tokens in the training, validation, and test data sets.</p>
        <p>In experiment 2 (<xref rid="figure3" ref-type="fig">Figure 3</xref>), we compared 3 negation and speculation-detection algorithms that performed well on public data sets with the best model from experiment 1. The algorithms evaluated were NegEx, NegBio, which has predefined expert rules and open-source implementation, and NegBERT, whose training code is available. We then performed category prediction (category 0 or 1) for all words that matched a given “important keyword” in the test data set. We also analyzed the sources of errors. In addition, we compared the performance of keyword extraction in positive and speculative statements potentially related to abnormal findings before and after applying various algorithms.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Experiment 1. X: the original text, ŷ: class predicted by the model; y: the gold standard. Category 0: positive statements or speculative statements potentially related to abnormal findings; category 1: negative statements or speculative statements unrelated to abnormal findings. ALBERT: A Lite Bidirectional Encoder Representations From Transformers; BERT: Bidirectional Encoder Representations From Transformers; DeBERTa: Decoding-Enhanced Bidirectional Encoder Representations From Transformers With Disentangled Attention; DistilBERT: Distilled version of Bidirectional Encoder Representations From Transformers; ELECTRA: Efficiently Learning an Encoder That Classifies Token Replacements Accurately; ERNIE: Enhanced Representation through Knowledge Integration; RoBERTa: Robustly Optimized Bidirectional Encoder Representations From Transformers Pretraining Approach; RUL: right upper lobe.</p>
          </caption>
          <graphic xlink:href="medinform_v11i1e46348_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Experiment 2 Note. X: the original text; ŷ: class predicted by the model; y: the gold standard; category 0: positive statements or speculative statements potentially related to abnormal findings; category 1: negative statements or speculative statements unrelated to abnormal findings; bold text: word matching a designated “important keyword.” Exp: experiment.</p>
          </caption>
          <graphic xlink:href="medinform_v11i1e46348_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Modeling in Experiments</title>
        <p>The deep learning models used in experiment 1 were ALBERT, BERT, DeBERTa, DistilBERT, ELECTRA, ERNIE, RoBERTa, SpanBERT, and XLNet. All models were fine-tuned based on the pretrained models from Hugging Face.</p>
        <p>We used early stopping and used the <italic>F</italic><sub>1</sub>-score as the model evaluation metric. We used the Adam optimizer with a batch size of 16 and weight decay of 0.01. <xref ref-type="table" rid="table2">Table 2</xref> lists the parameters of other models. We set all other unspecified parameters to the default values provided by the open-source PyTorch framework. We segmented the texts into blocks of no more than 510 characters before entering the model to avoid truncation.</p>
        <p>We adopted a sequence-to-sequence approach for the training. The training program input the report text in the training and development data set into the model using the corresponding tokenizer and trained the model. The models predicted the token categories using the radiologist-annotated data as the gold standard. The test data set was not included in the training process.</p>
        <p>For the NegEx algorithm, we used the negspaCy pipeline component of the open-source Spacy software [<xref ref-type="bibr" rid="ref41">41</xref>]. The specific named entity recognition model used was “en_ner_bc5cdr_md.” In addition, we extended the recognizable entities in Spacy to include all the important keywords defined in our experiment.</p>
        <p>We used the previously published training parameters of NegBERT, including a batch size of 8, maximum training epochs of 60, an initial learning rate of 3 × 10<sup>−5</sup>, and an early stopping patience of 6. We applied NegBERT for cue detection using the model “bert-base-uncased” and scope detection using the model “xlnet-base-cased.” Furthermore, we validated that the trained NegBERT showed a comparable level of performance to that reported in the original publication on the data set specified in the original study.</p>
        <p>In addition to the configuration mentioned earlier, we made only minimal modifications to NegBio and NegBERT, such as specifying the dependent software versions, adding the necessary files to the installation, and configuring file paths to ensure the proper execution of the software.</p>
        <p>In experiment 2, we optimized the performance of the NegEx, NegBio, and NegBERT methods. This optimization was achieved by modifying the expert-defined rules of NegEx and NegBio and using our training and development data set, as well as the negation and speculation cues we identified, to train NegBERT without using the data set from the original study.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Deep learning model and training parameters used in this study.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="90"/>
            <thead>
              <tr valign="bottom">
                <td>Model</td>
                <td>Learning rate</td>
                <td>Warm-up steps</td>
                <td>Adam beta1</td>
                <td>Adam beta2</td>
                <td>Adam epsilon</td>
                <td>FP16<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>ALBERT<sup>b</sup></td>
                <td>1 × 10<sup>−5</sup></td>
                <td>10,000</td>
                <td>0.9</td>
                <td>0.999</td>
                <td>1 × 10<sup>−8</sup></td>
                <td>False</td>
              </tr>
              <tr valign="top">
                <td>BERT<sup>c</sup></td>
                <td>1 × 10<sup>−4</sup></td>
                <td>10,000</td>
                <td>0.9</td>
                <td>0.999</td>
                <td>1 × 10<sup>−8</sup></td>
                <td>False</td>
              </tr>
              <tr valign="top">
                <td>DeBERTa<sup>d</sup></td>
                <td>1 × 10<sup>−4</sup></td>
                <td>10,000</td>
                <td>0.9</td>
                <td>0.999</td>
                <td>1 × 10<sup>−6</sup></td>
                <td>True</td>
              </tr>
              <tr valign="top">
                <td>DistilBERT<sup>e</sup></td>
                <td>2 × 10<sup>−5</sup></td>
                <td>0</td>
                <td>0.9</td>
                <td>0.999</td>
                <td>1 × 10<sup>−8</sup></td>
                <td>False</td>
              </tr>
              <tr valign="top">
                <td>ELECTRA<sup>f</sup></td>
                <td>1 × 10<sup>−4</sup></td>
                <td>10,000</td>
                <td>0.9</td>
                <td>0.999</td>
                <td>1 × 10<sup>−6</sup></td>
                <td>False</td>
              </tr>
              <tr valign="top">
                <td>ERNIE<sup>g</sup></td>
                <td>5 × 10<sup>−5</sup></td>
                <td>4000</td>
                <td>0.9</td>
                <td>0.98</td>
                <td>1 × 10<sup>−8</sup></td>
                <td>False</td>
              </tr>
              <tr valign="top">
                <td>RoBERTa<sup>h</sup></td>
                <td>1 × 10<sup>−4</sup></td>
                <td>10,000</td>
                <td>0.9</td>
                <td>0.999</td>
                <td>1 × 10<sup>−8</sup></td>
                <td>False</td>
              </tr>
              <tr valign="top">
                <td>SpanBERT</td>
                <td>5 × 10<sup>−5</sup></td>
                <td>10,000</td>
                <td>0.9</td>
                <td>0.999</td>
                <td>1 × 10<sup>−8</sup></td>
                <td>False</td>
              </tr>
              <tr valign="top">
                <td>XLNet</td>
                <td>2 × 10<sup>−5</sup></td>
                <td>10,000</td>
                <td>0.9</td>
                <td>0.999</td>
                <td>1 × 10<sup>−6</sup></td>
                <td>False</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>FP16: half-precision floating-point format.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>ALBERT: A Lite Bidirectional Encoder Representations From Transformers.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>BERT: Bidirectional Encoder Representations From Transformers.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>DeBERTa: Decoding-Enhanced Bidirectional Encoder Representations From Transformers With Disentangled Attention.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>DistilBERT: Distilled version of Bidirectional Encoder Representations from Transformers.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>ELECTRA: Efficiently Learning an Encoder That Classifies Token Replacements Accurately.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>ERNIE: Enhanced Representation through Knowledge Integration.</p>
            </fn>
            <fn id="table2fn8">
              <p><sup>h</sup>RoBERTa: Robustly Optimized Bidirectional Encoder Representations From Transformers Pretraining Approach.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Demographics</title>
        <p>The data set included in this study consisted of 6000 radiology reports, including plain radiography reports (2538/6000, 42.3%), CT reports (2163/6000, 36.05%), MRI reports (668/6000, 11.13%), ultrasound reports (483/6000, 8.05%), angiography reports (97/6000, 1.62%), and reports from other types of studies (51/6000, 0.85%). The report was completed by 78 radiology residents and their attending physicians. The training, validation and test data sets were mutually exclusive with no overlap in the samples.</p>
        <p>The data set used in this study consisted of 78,901 sentences and 704,512 words. A total of 15.01% (105,755/704,512) of all the words in the data set, were identified as negative and speculative statements unrelated to abnormal findings. <xref ref-type="table" rid="table3">Table 3</xref> presents examples and frequencies of these statements. In this study, we defined a “word” as a contiguous sequence of one or more non–white space characters of maximum length. For example, “(−) metastasis” contains 2 words.</p>
        <p>Of all the 16,374 cases of sentential negations identified, 15,568 (95.1%) used “no,” “without,” “not,” or “none” as the first word of the negative statement. Furthermore, of all the 2763 cases of negation using symbols or abbreviations, we observed that 2411 (87.2%) used (−), (_), ( ), or [−] at the beginning, end, or middle of the negated clause.</p>
        <p><xref ref-type="table" rid="table4">Table 4</xref> presents the frequency and number of occurrences of important keywords, as defined in this study, within negative or speculative statements unrelated to abnormal findings and the total number of occurrences in the study.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Types and numbers of negative and the speculative sentences unrelated to abnormal findings included in this study (N=19,467).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="530"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td>Type</td>
                <td>Example</td>
                <td>Findings, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Sentential negation</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>No evidence of aortic dissection</p>
                    </list-item>
                  </list>
                </td>
                <td>16,374 (84.11)</td>
              </tr>
              <tr valign="top">
                <td>Symbols or abbreviations</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Metastasis (−)</p>
                    </list-item>
                    <list-item>
                      <p>Thrombosis: No</p>
                    </list-item>
                    <list-item>
                      <p>DM<sup>a</sup>- HTN<sup>b</sup>-</p>
                    </list-item>
                    <list-item>
                      <p>Anti-HCV<sup>c</sup> [Negative]</p>
                    </list-item>
                    <list-item>
                      <p>- lung - bone</p>
                    </list-item>
                  </list>
                </td>
                <td>2762 (14.19)</td>
              </tr>
              <tr valign="top">
                <td>Speculative statements not related to abnormal findings</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>No CT<sup>d</sup> evidence of pulmonary embolism. Suggest V/Q<sup>e</sup> scan to exclude small branch embolism if indicated</p>
                    </list-item>
                    <list-item>
                      <p>Metallic artifacts, lesion may be obscured</p>
                    </list-item>
                  </list>
                </td>
                <td>196 (1.01)</td>
              </tr>
              <tr valign="top">
                <td>Morphological negation</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>This coronary CT scan is nondiagnostic.</p>
                    </list-item>
                  </list>
                </td>
                <td>135 (0.69)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>DM: diabetes mellitus.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>HTN: hypertension.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>HCV: hepatitis C virus.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>CT: computed tomography.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>V/Q: ventilation and perfusion.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Occurrence and frequency of important keywords defined in this study within negative or the speculative statements unrelated to abnormal findings.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="680"/>
            <col width="320"/>
            <thead>
              <tr valign="bottom">
                <td>Keywords and their overall occurrences (n=11,480)</td>
                <td>Occurrences (N+S)<sup>a</sup>, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Pneumothorax, n=1288 (11.22%)</td>
                <td>976 (75.78)</td>
              </tr>
              <tr valign="top">
                <td>Extravasation, n=182 (1.58%)</td>
                <td>84 (46.2)</td>
              </tr>
              <tr valign="top">
                <td>Fracture, n=2161 (18.82%)</td>
                <td>992 (45.90)</td>
              </tr>
              <tr valign="top">
                <td>Tumor, n=2698 (23.5%)</td>
                <td>1025 (37.99)</td>
              </tr>
              <tr valign="top">
                <td>Infarct, n=1364 (11.88%)</td>
                <td>514 (37.68)</td>
              </tr>
              <tr valign="top">
                <td>Consolidation, n=428 (3.73%)</td>
                <td>152 (35.5)</td>
              </tr>
              <tr valign="top">
                <td>Pneumoperitoneum, n=63 (0.55%)</td>
                <td>19 (30)</td>
              </tr>
              <tr valign="top">
                <td>Thrombosis, n=614 (5.35%)</td>
                <td>143 (23.3)</td>
              </tr>
              <tr valign="top">
                <td>Dissection, n=673 (5.86%)</td>
                <td>147 (21.8)</td>
              </tr>
              <tr valign="top">
                <td>Metastasis, n=1876 (16.34%)</td>
                <td>450 (23.98)</td>
              </tr>
              <tr valign="top">
                <td>Bleeding, n=118 (1.03%)</td>
                <td>27 (22.9)</td>
              </tr>
              <tr valign="top">
                <td>Pulmonary TB<sup>b</sup>, n=15 (0.13%)</td>
                <td>0 (0)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>Number of occurrences within negative or speculative statements unrelated to abnormal findings.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>TB: tuberculosis.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Result of Experiment 1</title>
        <p><xref ref-type="table" rid="table5">Table 5</xref> presents the results of experiment 1. The accuracy of all transformer-based models included in this experiment was greater than 0.98 for both the training, validation, and test data sets, with macro <italic>F</italic><sub>1</sub>-scores &#62;0.90. The best-performing model, ALBERT, was selected for inclusion in experiment 2.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Comparison of deep learning prediction performance.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="120"/>
            <col width="90"/>
            <col width="90"/>
            <col width="120"/>
            <col width="0"/>
            <col width="130"/>
            <col width="90"/>
            <col width="90"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">Train and validation data set</td>
                <td colspan="4">Test data set</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td>Recall</td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>Accuracy</td>
                <td colspan="2">Precision</td>
                <td>Recall</td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>Accuracy</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>ALBERT<sup>a</sup></td>
                <td>0.992</td>
                <td>0.990</td>
                <td>0.992</td>
                <td>0.998</td>
                <td colspan="2">
                  <italic>0.973</italic>
                  <sup>b</sup>
                </td>
                <td>
                  <italic>0.943</italic>
                  <sup>b</sup>
                </td>
                <td>
                  <italic>0.958</italic>
                  <sup>b</sup>
                </td>
                <td>
                  <italic>0.991</italic>
                  <sup>b</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>BERT<sup>c</sup></td>
                <td>0.980</td>
                <td>0.986</td>
                <td>0.983</td>
                <td>0.995</td>
                <td colspan="2">0.960</td>
                <td>0.930</td>
                <td>0.945</td>
                <td>0.989</td>
              </tr>
              <tr valign="top">
                <td>DeBERTa<sup>d</sup></td>
                <td>0.989</td>
                <td>0.971</td>
                <td>0.975</td>
                <td>0.993</td>
                <td colspan="2">0.958</td>
                <td>0.859</td>
                <td>0.906</td>
                <td>0.980</td>
              </tr>
              <tr valign="top">
                <td>DistilBERT<sup>e</sup></td>
                <td>0.994</td>
                <td>0.990</td>
                <td>0.992</td>
                <td>0.998</td>
                <td colspan="2">0.980</td>
                <td>0.912</td>
                <td>0.945</td>
                <td>0.988</td>
              </tr>
              <tr valign="top">
                <td>ELECTRA<sup>f</sup></td>
                <td>0.982</td>
                <td>0.982</td>
                <td>0.982</td>
                <td>0.995</td>
                <td colspan="2">0.956</td>
                <td>0.943</td>
                <td>0.950</td>
                <td>0.989</td>
              </tr>
              <tr valign="top">
                <td>ERNIE<sup>g</sup></td>
                <td>0.987</td>
                <td>0.984</td>
                <td>0.986</td>
                <td>0.996</td>
                <td colspan="2">0.963</td>
                <td>0.920</td>
                <td>0.941</td>
                <td>0.988</td>
              </tr>
              <tr valign="top">
                <td>RoBERTa<sup>h</sup></td>
                <td>0.959</td>
                <td>0.979</td>
                <td>0.969</td>
                <td>0.991</td>
                <td colspan="2">0.890</td>
                <td>0.933</td>
                <td>0.911</td>
                <td>0.980</td>
              </tr>
              <tr valign="top">
                <td>SpanBERT</td>
                <td>0.992</td>
                <td>0.992</td>
                <td>0.992</td>
                <td>0.998</td>
                <td colspan="2">0.958</td>
                <td>0.932</td>
                <td>0.945</td>
                <td>0.988</td>
              </tr>
              <tr valign="top">
                <td>XLNet</td>
                <td>0.993</td>
                <td>0.993</td>
                <td>0.993</td>
                <td>0.998</td>
                <td colspan="2">0.970</td>
                <td>0.943</td>
                <td>0.957</td>
                <td>0.990</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>ALBERT: A Lite Bidirectional Encoder Representations From Transformers.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>Italics highlight that the performance of A Lite Bidirectional Encoder Representations From Transformers is the best comparing to the control method across various performance metrics.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>BERT: Bidirectional Encoder Representations From Transformers.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>DeBERTa: Decoding-Enhanced Bidirectional Encoder Representations From Transformers With Disentangled Attention.</p>
            </fn>
            <fn id="table5fn5">
              <p><sup>e</sup>DistilBERT: Distilled version of Bidirectional Encoder Representations from Transformers.</p>
            </fn>
            <fn id="table5fn6">
              <p><sup>f</sup>ELECTRA: Efficiently Learning an Encoder That Classifies Token Replacements Accurately.</p>
            </fn>
            <fn id="table5fn7">
              <p><sup>g</sup>ERNIE: Enhanced Representation through Knowledge Integration.</p>
            </fn>
            <fn id="table5fn8">
              <p><sup>h</sup>RoBERTa: Robustly Optimized Bidirectional Encoder Representations From Transformers Pretraining Approach.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Result of Experiment 2</title>
        <p>Before optimization, the performance of NegBio and NegBERT was suboptimal. The <italic>F</italic><sub>1</sub>-scores for NegEx, NegBio, and NegBERT were 0.889, 0.587, and 0.393, respectively. Our optimization significantly improved the performance of NegBio and NegBERT by increasing their <italic>F</italic><sub>1</sub>-scores by 0.239 and 0.588, respectively.</p>
        <p><xref ref-type="table" rid="table6">Table 6</xref> shows the performance of ALBERT and optimized NegEx, NegBio, and NegBERT. The precision, recall, and <italic>F</italic><sub>1</sub>-score of our fine-tuned transformer-based model (ALBERT) were better than those of the optimized NegEx, NegBio, and NegBERT.</p>
        <p><xref ref-type="table" rid="table7">Table 7</xref> shows the performance evaluation of keyword extraction before and after applying the different negation and speculation-detection algorithms. The ALBERT method resulted in the most significant performance improvement in extracting keywords from positive and speculative statements potentially associated with abnormal findings.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Comparison of performance of A Lite Bidirectional Encoder Representations From Transformers (ALBERT) and optimized NegEx, NegBio, and NegBERT in the test data set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="250"/>
            <col width="200"/>
            <col width="190"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td>Recall</td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>Accuracy</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>ALBERT</td>
                <td>0.<italic>991</italic><sup>a</sup></td>
                <td>0.<italic>992</italic><sup>a</sup></td>
                <td>0.<italic>991</italic><sup>a</sup></td>
                <td>0.<italic>996</italic><sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>NegEx</td>
                <td>0.886</td>
                <td>0.958</td>
                <td>0.921</td>
                <td>0.959</td>
              </tr>
              <tr valign="top">
                <td>NegBio</td>
                <td>0.860</td>
                <td>0.794</td>
                <td>0.826</td>
                <td>0.917</td>
              </tr>
              <tr valign="top">
                <td>NegBERT</td>
                <td>0.992</td>
                <td>0.970</td>
                <td>0.981</td>
                <td>0.991</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>Italics highlight that the performance of ALBERT is the best comparing to the control method (NegEx, NegBio, NegBERT) across various performance metrics.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Comparison of the performance of keyword extraction in the test data set both before and after applying A Lite Bidirectional Encoder Representations From Transformers (ALBERT) and optimized NegEx, NegBio, and NegBERT.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="250"/>
            <col width="200"/>
            <col width="190"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td>Recall</td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>Accuracy</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>ALBERT</td>
                <td>0.<italic>998</italic><sup>a</sup></td>
                <td>0.<italic>997</italic><sup>a</sup></td>
                <td>0.<italic>997</italic><sup>a</sup></td>
                <td>0.<italic>996</italic><sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>NegEx</td>
                <td>0.986</td>
                <td>0.959</td>
                <td>0.972</td>
                <td>0.959</td>
              </tr>
              <tr valign="top">
                <td>NegBio</td>
                <td>0.934</td>
                <td>0.958</td>
                <td>0.945</td>
                <td>0.917</td>
              </tr>
              <tr valign="top">
                <td>NegBERT</td>
                <td>0.99</td>
                <td>0.998</td>
                <td>0.994</td>
                <td>0.991</td>
              </tr>
              <tr valign="top">
                <td>Baseline<sup>b</sup></td>
                <td>0.752</td>
                <td>1.00</td>
                <td>0.859</td>
                <td>0.752</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>Italics highlight that the performance of ALBERT is the best comparing to the control method (NegEx, NegBio, NegBERT) and baseline (no negation or speculation detection were performed) across various performance metrics.</p>
            </fn>
            <fn id="table7fn2">
              <p><sup>b</sup>All named entities considered “positive.” No negation or speculation-detection algorithm was applied.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Sources of Errors</title>
        <sec>
          <title>Overview</title>
          <p>We analyzed the sources of the errors (<xref ref-type="table" rid="table8">Table 8</xref>). Despite changes in the rules defined by the experts, errors persisted in NegEx and NegBio. We identified the following causes:</p>
          <table-wrap position="float" id="table8">
            <label>Table 8</label>
            <caption>
              <p>Analysis of the causes of errors in different methods (after optimization).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="750"/>
              <col width="0"/>
              <col width="220"/>
              <thead>
                <tr valign="bottom">
                  <td colspan="3">Method and cause of the wrong prediction<sup>a</sup></td>
                  <td>Counts, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="4">
                    <bold>NegBio (n=177)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Errors in the extraction of named entities</td>
                  <td colspan="2">58 (32.8)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Symbol-related errors</td>
                  <td colspan="2">49 (27.7)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Tokenization error</td>
                  <td colspan="2">21 (11.9)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Errors in the prediction of speculative statements</td>
                  <td colspan="2">14 (7.9)</td>
                </tr>
                <tr valign="top">
                  <td colspan="4">
                    <bold>NegEx (n=87)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>False-positive prediction related to speculative statements</td>
                  <td colspan="2">37 (42)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Trigger word not triggered</td>
                  <td colspan="2">21 (24)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Incorrect scope resolution</td>
                  <td colspan="2">16 (18)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Symbol-related errors</td>
                  <td colspan="2">6 (6)</td>
                </tr>
                <tr valign="top">
                  <td colspan="4">
                    <bold>NegBERT (n=20)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>All false-negative predictions</td>
                  <td colspan="2">16 (80)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>All false-positive predictions</td>
                  <td colspan="2">4 (20)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>False-positive predictions related to speculative statements unrelated to abnormal findings</td>
                  <td colspan="2">0 (0)</td>
                </tr>
                <tr valign="top">
                  <td colspan="4">
                    <bold>ALBERT<sup>b</sup> (n=9)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>All false-positive predictions</td>
                  <td colspan="2">5 (55)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>False-positive predictions related to speculative statements unrelated to abnormal findings</td>
                  <td colspan="2">0 (0)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table8fn1">
                <p><sup>a</sup>The table only list the most important causes of identifiable error.</p>
              </fn>
              <fn id="table8fn2">
                <p><sup>b</sup>ALBERT: A Lite Bidirectional Encoder Representations From Transformers.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Findings of NegEx</title>
          <p>First, we found many errors owing to incompatibility between the NegEx method for identifying speculative statements and the study requirements. NegEx made identical predictions for all keywords in the identified speculative statements regardless of their relevance to abnormal findings. However, our study categorized keywords in speculative sentences differently based on their relevance to abnormal findings, leading to discrepancies with NegEx’s results.</p>
          <p>Second, the trigger word would only sometimes trigger. For example, in the phrase “1.No evidence of tumor,” the trigger word “No” would not be recognized because it was concatenated with the character “1.” without any intervening space.</p>
          <p>Third, errors also occurred owing to the misinterpretation of the scope of negation and speculation, such as misinterpreting “No improvement of the tumor” as “No tumor.”</p>
          <p>Fourth, errors occurred in the presence of symbols in radiology reports; for example, the use of special symbols by radiologists that are undefined in the trigger word or the confusion caused by the co-occurrence of special symbols that express a positive and a negative statement: (−) fatty liver and (+) portal vein thrombosis.</p>
        </sec>
        <sec>
          <title>Findings of NegBio</title>
          <p>We identified the following errors when using NegBio:</p>
          <p>First, errors occurred in named entity extraction. The named entities in NegBio’s output file might be missing target keywords or had incorrect positions, resulting in incorrect future analyses.</p>
          <p>Second, errors occurred when the radiology report contained negations using symbols or abbreviations, such as “metastasis (−).” Our analysis showed that these symbols could lead to unpredictable results in syntactic structure analysis and subsequent analyses.</p>
          <p>Third, combining words with numerals or punctuation marks leads to errors in tokenization and subsequent analysis. For example, “1.No” in “1.No obvious acute infarct or brain metastasis” was not correctly parsed as “No.”</p>
          <p>Fourth, many errors occurred because NegBio made identical predictions for diagnostic keywords in all speculative sentences, regardless of their relevance to abnormal findings. This behavior was inconsistent with the labeling of this experiment.</p>
        </sec>
        <sec>
          <title>Findings of NegBERT and ALBERT</title>
          <p>We observed the suboptimal performance of NegBERT when applied to corpora from different domains and tasks. The performance of NegBERT trained on the Simon Fraser University review corpus was suboptimal when evaluated on our corpus and task. Retraining NegBERT with our data significantly improved its performance, indicating that the poor performance was primarily due to differences in the training data and labeling.</p>
          <p>Our error analysis showed that retrained NegBERT and ALBERT made fewer errors than the other methods in predicting whether words occurred in speculative statements unrelated to abnormal findings. The number of all false-positive predictions by NegBERT and ALBERT was 4 and 5, respectively. Both were lower than the number of false-positive predictions made by NegEx and NegBio for this prediction task, indicating higher specificity. However, because we grouped all negative and speculative statements not related to abnormal findings into the same category, we could not calculate the exact value of specificity. Both models showed 100% sensitivity in identifying important diagnostic keywords in speculative statements unrelated to abnormal findings, with no false-negative predictions.</p>
          <p>Owing to the complexity of BERT, we could not further analyze the causes of other errors.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <sec>
          <title>Overview</title>
          <p>This study found that 39.45% (4529/11,480) of the important diagnostic keywords occurred in negative or speculative statements unrelated to abnormal findings, posing a challenge for automatic labeling by LISs and information extraction techniques.</p>
          <p>Our study proposes a deep learning method that accurately distinguishes whether diagnostic keywords are in negative or speculative statements unrelated to abnormal findings. Our research has revealed the shortcomings of existing methods, including NegEx, NegBio, and NegBERT, while highlighting the advantages of our proposed approach over these methods.</p>
        </sec>
        <sec>
          <title>Limitation of NegEx and NegBio</title>
          <p>We observed common errors in Spacy’s NegEx and NegBio that the expert rule adjustment could not resolve.</p>
          <p>First, several vital errors in NegEx and NegBio, including errors related to trigger words in NegEx, tokenization errors in NegBio, and symbol-related errors in NegEx and NegBio, were attributed to interference from punctuation and numerals. For example, in the radiology reports in our sample, English sentences were often combined with numbers and punctuation marks and written as numbered or bulleted lists, such as “1.No evidence of aortic dissection” In addition, using symbols or abbreviations in the form of checklists was also common. For example, “Metastasis (−)” or “Anti-HCV [Negative]” were frequently used. Our results showed that NegEx and NegBio could not handle this issue correctly.</p>
          <p>Second, NegEx and NegBio also caused many errors in the analyses where the simultaneous observation of multiple sentences is required. Our data showed that it is often necessary to examine multiple sentences simultaneously to determine whether speculative statements are associated with abnormal findings. For example, in “No CT evidence of large infarct. Suggest MRI to exclude hyperacute infarct if indicated,” without considering the first sentence, which denies the finding of infarct evidence, it cannot be determined that the “hyperacute infarct” in the second sentence is unrelated to the actual findings. NegEx and NegBio, which are designed to analyze sentences in isolation without considering contextual information, cannot meet this requirement.</p>
          <p>Our results regarding NegEx are consistent with previous research of Wu et al [<xref ref-type="bibr" rid="ref42">42</xref>], highlighting the importance of tuning algorithms such as NegEx to achieve optimal performance in different corpora. Our results also confirm that NegEx produces incorrect results owing to improper negation scope resolution [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
          <p>We found that NegBio requires modifying expert-defined rules to improve its performance. Our study is the first to report NegBio’s limited generalizability in real-world radiology reports across all body parts. We also observed problems with the implementation of NegBio.</p>
        </sec>
        <sec>
          <title>Limitation of NegBERT</title>
          <p>Our experiment showed a significant improvement in NegBERT’s performance after retraining on our hospital data set. The difference in the training data and annotations is likely the reason for the initial poor performance of NegBERT.</p>
          <p>This observation is consistent with previous findings that deep learning models such as BERT tend to perform poorly on out-of-domain corpora. For example, a study by Miller et al [<xref ref-type="bibr" rid="ref39">39</xref>] using RoBERTa for negation detection on both in-domain and out-of-domain corpora observed <italic>F</italic><sub>1</sub>-scores of 0.95 and 0.583, respectively. Our experiment supports this result and shows that the drop in <italic>F</italic><sub>1</sub>-scores can be even worse depending on the corpus and task.</p>
        </sec>
        <sec>
          <title>Advantages of ALBERT and BERT Transformer</title>
          <p>We performed a comparison between the ALBERT and NegBERT methods and made the following key observations.</p>
          <p>First, learning the negation cue and scope in 2 steps provides a limited performance improvement. Our method takes a different approach from NegBERT and traditional negation recognition studies in that our model learn the entire part of the sentence containing both the cue and scope in the same step without explicitly telling the model which word is the “cue” of the negation or speculation. However, the performance was still better than that of the retrained NegBERT. The study by Sergeeva et al [<xref ref-type="bibr" rid="ref11">11</xref>] based on LSTM suggests that the deep learning method can learn negation cue information to some extent automatically, with performance comparable with that of automatic cue prediction algorithms. Our results show that BERT might have a similar capability. Our results suggest that providing additional cue information through expert annotation may not significantly improve performance compared with other factors, such as model selection, hyperparameter optimization, and training techniques.</p>
          <p>Second, our results show that the model size and complexity do not necessarily correlate with improved performance. In our study, the fine-tuned ALBERT model outperformed larger and more complex models, including BERT and XLNet used by NegBERT, as well as RoBERTa used in the study by Miller et al [<xref ref-type="bibr" rid="ref39">39</xref>]. The use of lightweight models, such as ALBERT, may have practical advantages, including reduced computational resource requirements and training time, compared with BERT [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
          <p>In our study, ALBERT and retrained NegBERT outperformed NegEx and NegBio in terms of the number of false-positive predictions and specificity while maintaining 100% sensitivity in predicting whether keywords occurred in speculative sentences unrelated to abnormal findings. This task required multisentence context analysis of our data set, and our results suggest that BERT can look at multiple sentences simultaneously. The attention mechanism is a reasonable explanation for this phenomenon.</p>
        </sec>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Our study fine-tuned the ALBERT model using a more comprehensive data set that included a broader range of imaging modalities and subspecialties than previous studies. <xref ref-type="table" rid="table9">Table 9</xref> shows the best performances and corresponding data sets used in previous studies that detected whether named entities occurred in negation and speculation in radiology reports. The range of imaging modalities and subspecialties represented in the radiology reports in these studies was limited, such as chest x-ray reports only in the study by Peng et al [<xref ref-type="bibr" rid="ref23">23</xref>] or brain CT and MRI reports only in the studies by Grivas et al [<xref ref-type="bibr" rid="ref43">43</xref>] and Sykes et al [<xref ref-type="bibr" rid="ref12">12</xref>]. We hypothesized that including a more diverse set of examination and imaging subspecialties in the data results in a more representative sample of the report content and improves the model’s generalizability. Our results support this hypothesis, as the ALBERT model showed only a 0.034 decrease in its <italic>F</italic><sub>1</sub>-score on an unseen test data set with different disease types and inputs from different physicians.</p>
        <p>Our experiments also address a more difficult speculation-detection task than previous studies; however, ALBERT still demonstrates good performance. This distinction requires the ability of the algorithm to consider multiple sentences simultaneously in our data set. To the best of our knowledge, our study is the first to propose a distinction between speculative sentences related and unrelated to abnormal findings based on the application scenario to facilitate more precise filtering and the first study to highlight the impact of the lack of multisentence analysis in negation detection algorithms.</p>
        <table-wrap position="float" id="table9">
          <label>Table 9</label>
          <caption>
            <p>Comparison of best performances between studies distinguishing whether named entities occurred in negation or speculation.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="130"/>
            <col width="140"/>
            <col width="120"/>
            <col width="90"/>
            <col width="90"/>
            <col width="100"/>
            <col width="90"/>
            <col width="100"/>
            <col width="140"/>
            <thead>
              <tr valign="bottom">
                <td>Study</td>
                <td>Algorithm</td>
                <td>P<sup>a</sup></td>
                <td>R<sup>b</sup></td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>Best<sup>c</sup></td>
                <td>N<sup>d</sup></td>
                <td>Task<sup>e</sup></td>
                <td>Type<sup>f</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Our study</td>
                <td>ALBERT<sup>g</sup></td>
                <td>0.991</td>
                <td>0.992</td>
                <td>0.991</td>
                <td>Test data set</td>
                <td>6000</td>
                <td>ND<sup>h</sup>+S<sup>*i</sup></td>
                <td>All body parts</td>
              </tr>
              <tr valign="top">
                <td>Sykes et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td>
                <td>BiLSTM<sup>j</sup></td>
                <td>0.973</td>
                <td>0.981</td>
                <td>0.977</td>
                <td>ESS<sup>k</sup></td>
                <td>630</td>
                <td>ND+S<sup>l</sup></td>
                <td>Brain CT<sup>m</sup> and MRI<sup>n</sup></td>
              </tr>
              <tr valign="top">
                <td>Peng et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td>
                <td>NegBio</td>
                <td>0.944</td>
                <td>0.944</td>
                <td>0.944</td>
                <td>Chest x-ray</td>
                <td>900</td>
                <td>ND+S</td>
                <td>Chest x-ray</td>
              </tr>
              <tr valign="top">
                <td>Grivas et al [<xref ref-type="bibr" rid="ref43">43</xref>]</td>
                <td>Edie-R</td>
                <td>0.925</td>
                <td>0.943</td>
                <td>0.934</td>
                <td>ESS</td>
                <td>630</td>
                <td>ND+S</td>
                <td>Brain CT and MRI</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table9fn1">
              <p><sup>a</sup>P: precision.</p>
            </fn>
            <fn id="table9fn2">
              <p><sup>b</sup>R: recall.</p>
            </fn>
            <fn id="table9fn3">
              <p><sup>c</sup>Name of the best-performing data set. Other data sets are not included.</p>
            </fn>
            <fn id="table9fn4">
              <p><sup>d</sup>Number of samples in the best-performing data set; other data sets are not included.</p>
            </fn>
            <fn id="table9fn5">
              <p><sup>e</sup>Task performed in the study.</p>
            </fn>
            <fn id="table9fn6">
              <p><sup>f</sup>Types of radiologic studies included in the study.</p>
            </fn>
            <fn id="table9fn7">
              <p><sup>g</sup>ALBERT: A Lite Bidirectional Encoder Representations From Transformers.</p>
            </fn>
            <fn id="table9fn8">
              <p><sup>h</sup>ND: negation detection.</p>
            </fn>
            <fn id="table9fn9">
              <p><sup>i</sup>S<sup>*</sup>: detection of speculation unrelated to abnormal findings.</p>
            </fn>
            <fn id="table9fn10">
              <p><sup>j</sup>BiLSTM: Bidirectional Long Short-Term Memory.</p>
            </fn>
            <fn id="table9fn11">
              <p><sup>k</sup>ESS: Edinburgh Stroke Study.</p>
            </fn>
            <fn id="table9fn12">
              <p><sup>l</sup>S: speculation detection.</p>
            </fn>
            <fn id="table9fn13">
              <p><sup>m</sup>CT: computed tomography.</p>
            </fn>
            <fn id="table9fn14">
              <p><sup>n</sup>MRI: magnetic resonance imaging.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Implication in Clinical Practice</title>
        <p>We found problems with NegEx and NegBio in that modifying expert-defined rules could not be solved, including difficulties with numbers and punctuation, implementation-specific challenges, and the design constraint of observing only a single sentence at a time; thus, NegEx and NegBio should be used cautiously or avoided in such situations to prevent errors. On the basis of our data, we also found that NegBio and NegBERT have limitations in generalizability, making them inappropriate for use without training or modeling.</p>
        <p>Our results indicate that BERT is more suitable than NegEx and NegBio for tasks involving multisentence context analysis, similar to the experiment conducted in this study. NegEx and NegBio were designed for single-sentence analysis because they segmented the text into independent sentences. This approach limits the ability to incorporate contextual information from other sentences into the analysis. While NegEx and NegBio can perform binary classification of words in sentences as speculative or not, they lack the capacity for further granular differentiation based on contextual information.</p>
        <p>We found that the training process of the transformers did not require 2 separate learning phases for cue and scope. Our findings could reduce the workload of expert annotation in clinical applications, as the explicit annotation of cues in a separate step requires additional work. This hypothesis needs further testing in future studies.</p>
        <p>Our results show that deep learning models outperform non–deep learning methods, and lightweight models such as ALBERT can achieve superior performance and outperform other transformer-based models. However, fine-tuning based on the specific domain corpus and task is still essential regardless of the model used.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>The data were obtained from 3 internal branches of a single institution and not from publicly available data sets. In addition, the speculation-detection task differed from previous studies in this area. The comparability of the performance with that of previous studies may be limited. If open data using the same annotation methodology become available, subsequent research could verify our findings by implementing the same model on the open data set.</p>
        <p>Our study optimized the control methods (NegEx and NegBio), but we cannot exclude the possibility of further performance improvement by modifying or adding expert rules. However, this highlights the limitations of an expert rule–based approach, which requires experts not only to detect negations and speculations but also to summarize and modify rules manually. Moreover, expert rules cannot resolve the algorithmic design or implementation constraints.</p>
        <p>To prevent the deep learning model from training failure, we combined negative statements with speculative statements unrelated to abnormal findings in the same category because of the low proportion of the latter. As a result, we cannot separately evaluate the model’s performance on negative and speculative sentences unrelated to abnormal findings or accurately quantify the latter’s performance. Nevertheless, metrics such as the number of false-positive predictions can still be used to compare the performance between methods.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Manual free-text reporting remains the norm in radiology worldwide, hampering the ability to perform computer-assisted analyses. The presence of information irrelevant to the actual findings poses a significant challenge to the implementation of automatic radiology report highlighting, flagging, or information extraction.</p>
        <p>Previous research on negation and speculation detection in radiology has aimed to identify all instances. Our study advances this by targeting only speculative statements unrelated to abnormal findings and improving the discrimination of relevant information using BERT’s multisentence contextual analysis capabilities.</p>
        <p>Lightweight transformer models, such as ALBERT, can outperform NegEx, NegBio, and NegBERT on more complex and diverse real-world radiology reports. Despite achieving good results on public data sets, NegBio and NegBERT demonstrated different performances on more complicated real-world radiology reports.</p>
        <p>Our research has potential applications in academia and clinical practice. Future studies may consider including lightweight models such as ALBERT. In clinical practice, our method achieved high performance. It can help algorithms such as keyword highlighting in hospital information systems to identify passages of potentially important information without false alarms, improving physician efficiency and health care quality. Our results also apply to radiology report information retrieval, such as search engines, in which negative and speculative statements unrelated to abnormalities can lead to incorrect results.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ALBERT</term>
          <def>
            <p>A Lite Bidirectional Encoder Representations From Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations From Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BiLSTM</term>
          <def>
            <p>Bidirectional Long Short-Term Memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CRF</term>
          <def>
            <p>Conditional Random Field</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CT</term>
          <def>
            <p>computed tomography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">DeBERTa</term>
          <def>
            <p>Decoding-Enhanced Bidirectional Encoder Representations From Transformers With Disentangled Attention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">DEEPEN</term>
          <def>
            <p>Dependency Parser Negation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">DistilBERT</term>
          <def>
            <p>Distilled version of Bidirectional Encoder Representations From Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">ELECTRA</term>
          <def>
            <p>Efficiently Learning an Encoder That Classifies Token Replacements Accurately</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">ERNIE</term>
          <def>
            <p>Enhanced Representation through Knowledge Integration</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">LIS</term>
          <def>
            <p>laboratory information system</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">LSTM</term>
          <def>
            <p>Long Short-Term Memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">MRI</term>
          <def>
            <p>magnetic resonance imaging</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">RoBERTa</term>
          <def>
            <p>Robustly Optimized Bidirectional Encoder Representations From Transformers Pretraining Approach</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research received no specific grants from any funding agency in the public, commercial, or not-for-profit sectors.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>KHW proposed the research topic and experimental design and completed the recruitment, data analysis, computer programming, and writing of the entire paper.</p>
        <p>CFL contributed to research design improvement and manuscript proofreading.</p>
        <p>CJC performed big medical data exporting and cleaning and manuscript proofreading.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lacson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Prevedello</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Andriole</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gandhi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dalal</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Sato</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Khorasani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Four-year impact of an alert notification system on closed-loop communication of critical test results</article-title>
          <source>AJR Am J Roentgenol</source>
          <year>2014</year>
          <month>12</month>
          <volume>203</volume>
          <issue>5</issue>
          <fpage>933</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25341129"/>
          </comment>
          <pub-id pub-id-type="doi">10.2214/AJR.14.13064</pub-id>
          <pub-id pub-id-type="medline">25341129</pub-id>
          <pub-id pub-id-type="pmcid">PMC4426858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ignácio</surname>
              <given-names>FC</given-names>
            </name>
            <name name-style="western">
              <surname>de Souza</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>D'Ippolito</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>MM</given-names>
            </name>
          </person-group>
          <article-title>Radiology report: what is the opinion of the referring physician?</article-title>
          <source>Radiol Bras</source>
          <year>2018</year>
          <month>09</month>
          <volume>51</volume>
          <issue>5</issue>
          <fpage>308</fpage>
          <lpage>12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30369658"/>
          </comment>
          <pub-id pub-id-type="doi">10.1590/0100-3984.2017.0115</pub-id>
          <pub-id pub-id-type="medline">30369658</pub-id>
          <pub-id pub-id-type="pmcid">PMC6198844</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reda</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Hashem</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Khashoggi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Abukhodair</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Clinicians' behavior toward radiology reports: a cross-sectional study</article-title>
          <source>Cureus</source>
          <year>2020</year>
          <month>11</month>
          <day>05</day>
          <volume>12</volume>
          <issue>11</issue>
          <fpage>e11336</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33304672"/>
          </comment>
          <pub-id pub-id-type="doi">10.7759/cureus.11336</pub-id>
          <pub-id pub-id-type="medline">33304672</pub-id>
          <pub-id pub-id-type="pmcid">PMC7719475</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>European Society of Radiology (ESR)</collab>
          </person-group>
          <article-title>ESR guidelines for the communication of urgent and unexpected findings</article-title>
          <source>Insights Imaging</source>
          <year>2012</year>
          <month>02</month>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>3</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22695992"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13244-011-0135-y</pub-id>
          <pub-id pub-id-type="medline">22695992</pub-id>
          <pub-id pub-id-type="pmcid">PMC3292650</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nakamura</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hanaoka</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nomura</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nakao</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Miki</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Watadani</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yoshikawa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hayashi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Abe</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Automatic detection of actionable radiology reports using bidirectional encoder representations from transformers</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2021</year>
          <month>09</month>
          <day>11</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>262</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-021-01623-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-021-01623-6</pub-id>
          <pub-id pub-id-type="medline">34511100</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-021-01623-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8436473</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Perrotta</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Karcher</surname>
              <given-names>DS</given-names>
            </name>
          </person-group>
          <article-title>Validating laboratory results in electronic health records: a College of American Pathologists Q-Probes study</article-title>
          <source>Arch Pathol Lab Med</source>
          <year>2016</year>
          <month>09</month>
          <volume>140</volume>
          <issue>9</issue>
          <fpage>926</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27575266"/>
          </comment>
          <pub-id pub-id-type="doi">10.5858/arpa.2015-0320-CP</pub-id>
          <pub-id pub-id-type="medline">27575266</pub-id>
          <pub-id pub-id-type="pmcid">PMC5513146</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Srinivasa Babu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brooks</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>The malpractice liability of radiology reports: minimizing the risk</article-title>
          <source>Radiographics</source>
          <year>2015</year>
          <month>03</month>
          <volume>35</volume>
          <issue>2</issue>
          <fpage>547</fpage>
          <lpage>54</lpage>
          <pub-id pub-id-type="doi">10.1148/rg.352140046</pub-id>
          <pub-id pub-id-type="medline">25763738</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruskin</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hueske-Kraus</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Alarm fatigue: impacts on patient safety</article-title>
          <source>Curr Opin Anaesthesiol</source>
          <year>2015</year>
          <month>12</month>
          <volume>28</volume>
          <issue>6</issue>
          <fpage>685</fpage>
          <lpage>90</lpage>
          <pub-id pub-id-type="doi">10.1097/ACO.0000000000000260</pub-id>
          <pub-id pub-id-type="medline">26539788</pub-id>
          <pub-id pub-id-type="pii">00001503-201512000-00013</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lazib</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Negation scope detection with recurrent neural networks models in review texts</article-title>
          <source>Proceedings of the 2nd International Conference of Young Computer Scientists, Engineers and Educators: Social Computing</source>
          <year>2016</year>
          <conf-name>ICYCSEE' 16</conf-name>
          <conf-date>August 20-22, 2016</conf-date>
          <conf-loc>Harbin, China</conf-loc>
          <fpage>494</fpage>
          <lpage>508</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Harabagiu</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>The role of a deep-learning method for negation detection in patient cohort identification from electroencephalography reports</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2018</year>
          <month>12</month>
          <day>05</day>
          <volume>2018</volume>
          <fpage>1018</fpage>
          <lpage>27</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30815145"/>
          </comment>
          <pub-id pub-id-type="medline">30815145</pub-id>
          <pub-id pub-id-type="pmcid">PMC6371289</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sergeeva</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Prinsen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Tahmasebi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Negation scope detection in clinical notes and scientific abstracts: a feature-enriched LSTM-based approach</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2019</year>
          <month>05</month>
          <day>06</day>
          <volume>2019</volume>
          <fpage>212</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31258973"/>
          </comment>
          <pub-id pub-id-type="medline">31258973</pub-id>
          <pub-id pub-id-type="pmcid">PMC6568093</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sykes</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Grivas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Grover</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tobin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sudlow</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Whiteley</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Mcintosh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Whalley</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Alex</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Comparison of rule-based and neural network models for negation detection in radiology reports</article-title>
          <source>Nat Lang Eng</source>
          <year>2021</year>
          <month>3</month>
          <volume>27</volume>
          <issue>2</issue>
          <fpage>203</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cambridge.org/core/journals/natural-language-engineering/article/abs/comparison-of-rulebased-and-neural-network-models-for-negation-detection-in-radiology-reports/9A7B3868603C408A7DC67D74DB240B7D#"/>
          </comment>
          <pub-id pub-id-type="doi">10.1017/s1351324920000509</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khandelwal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Britto</surname>
              <given-names>BK</given-names>
            </name>
          </person-group>
          <article-title>Multitask learning of negation and speculation using transformers</article-title>
          <source>Proceedings of the 11th International Workshop on Health Text Mining and Information Analysis</source>
          <year>2020</year>
          <conf-name>Louhi' 20</conf-name>
          <conf-date>November 20, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <fpage>79</fpage>
          <lpage>87</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.louhi-1.9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fancellu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Webber</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Neural networks for negation scope detection</article-title>
          <source>Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2016</year>
          <conf-name>ACL' 16</conf-name>
          <conf-date>August 7-12, 2016</conf-date>
          <conf-loc>Berlin, Germany</conf-loc>
          <fpage>495</fpage>
          <lpage>504</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/P16-1047</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gautam</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Maharjan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Banjade</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tamang</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rus</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Long short term memory based models for negation handling in tutorial dialogues</article-title>
          <source>Proceedings of the 31st International Florida Artificial Intelligence Research Society Conference</source>
          <year>2018</year>
          <conf-name>FLAIRS' 18</conf-name>
          <conf-date>May 21-23, 2018</conf-date>
          <conf-loc>Melbourne, FL, USA</conf-loc>
          <fpage>14</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.13140/RG.2.2.26250.36804</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khandelwal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sawant</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>NegBERT: a transfer learning approach for negation detection and scope resolution</article-title>
          <source>Proceedings of the 12th Language Resources and Evaluation Conference</source>
          <year>2020</year>
          <conf-name>LREC' 20</conf-name>
          <conf-date>May 11-16, 2020</conf-date>
          <conf-loc>Marseille, France</conf-loc>
          <fpage>5739</fpage>
          <lpage>48</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.lrec-1.704.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Speculation and negation scope detection via convolutional neural networks</article-title>
          <source>Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2016</year>
          <month>11</month>
          <conf-name>EMNLP' 16</conf-name>
          <conf-date>November 1-5, 2016</conf-date>
          <conf-loc>Austin, TX, USA</conf-loc>
          <fpage>815</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/D16-1078</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Bridewell</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hanbury</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>GF</given-names>
            </name>
            <name name-style="western">
              <surname>Buchanan</surname>
              <given-names>BG</given-names>
            </name>
          </person-group>
          <article-title>A simple algorithm for identifying negated findings and diseases in discharge summaries</article-title>
          <source>J Biomed Inform</source>
          <year>2001</year>
          <month>10</month>
          <volume>34</volume>
          <issue>5</issue>
          <fpage>301</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(01)91029-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1006/jbin.2001.1029</pub-id>
          <pub-id pub-id-type="medline">12123149</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(01)91029-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mutalik</surname>
              <given-names>PG</given-names>
            </name>
            <name name-style="western">
              <surname>Deshpande</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nadkarni</surname>
              <given-names>PM</given-names>
            </name>
          </person-group>
          <article-title>Use of general-purpose negation detection to augment concept indexing of medical documents: a quantitative study using the UMLS</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2001</year>
          <month>11</month>
          <volume>8</volume>
          <issue>6</issue>
          <fpage>598</fpage>
          <lpage>609</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/11687566"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2001.0080598</pub-id>
          <pub-id pub-id-type="medline">11687566</pub-id>
          <pub-id pub-id-type="pmcid">PMC130070</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gindl</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Miksch</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Syntactical negation detection in clinical practice guidelines</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2008</year>
          <volume>136</volume>
          <fpage>187</fpage>
          <lpage>92</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/18487729"/>
          </comment>
          <pub-id pub-id-type="medline">18487729</pub-id>
          <pub-id pub-id-type="pmcid">PMC2855380</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronow</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Fangfang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Croft</surname>
              <given-names>WB</given-names>
            </name>
          </person-group>
          <article-title>Ad hoc classification of radiology reports</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>1999</year>
          <month>09</month>
          <volume>6</volume>
          <issue>5</issue>
          <fpage>393</fpage>
          <lpage>411</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/10495099"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.1999.0060393</pub-id>
          <pub-id pub-id-type="medline">10495099</pub-id>
          <pub-id pub-id-type="pmcid">PMC61382</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mehrabi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Krishnan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Roch</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kesterson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Beesley</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dexter</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Max Schmidt</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Palakal</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>DEEPEN: a negation detection system for clinical text incorporating dependency relation into NegEx</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>04</month>
          <volume>54</volume>
          <fpage>213</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00043-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.02.010</pub-id>
          <pub-id pub-id-type="medline">25791500</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00043-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC5863758</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bagheri</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Summers</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>NegBio: a high-performance tool for negation and uncertainty detection in radiology reports</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2018</year>
          <month>05</month>
          <day>18</day>
          <volume>2017</volume>
          <fpage>188</fpage>
          <lpage>96</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29888070"/>
          </comment>
          <pub-id pub-id-type="medline">29888070</pub-id>
          <pub-id pub-id-type="pmcid">PMC5961822</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Medlock</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Briscoe</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Weakly supervised learning for hedge classification in scientific literature</article-title>
          <source>Proceedings of the 45th Annual Meeting of the Association of Computational Linguistics</source>
          <year>2007</year>
          <conf-name>ACL' 07</conf-name>
          <conf-date>June 25-27, 2007</conf-date>
          <conf-loc>Prague, Czech Republic</conf-loc>
          <fpage>992</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P07-1125.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rokach</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Romano</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Maimon</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Negation recognition in medical narrative reports</article-title>
          <source>Inf Retr</source>
          <year>2008</year>
          <month>6</month>
          <day>7</day>
          <volume>11</volume>
          <issue>6</issue>
          <fpage>499</fpage>
          <lpage>538</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://link.springer.com/article/10.1007/s10791-008-9061-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10791-008-9061-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morante</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Liekens</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Daelemans</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Learning the scope of negation in biomedical texts</article-title>
          <source>Proceedings of the 2008 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2008</year>
          <conf-name>EMNLP' 08</conf-name>
          <conf-date>October 25-27, 2008</conf-date>
          <conf-loc>Honolulu HI, USA</conf-loc>
          <fpage>715</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D08-1075.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/1613715.1613805</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morante</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Daelemans</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>A metalearning approach to processing the scope of negation</article-title>
          <source>Proceedings of the 13th Conference on Computational Natural Language Learning</source>
          <year>2009</year>
          <conf-name>CoNLL '09</conf-name>
          <conf-date>June 4-5, 2009</conf-date>
          <conf-loc>Boulder, CO, USA</conf-loc>
          <fpage>21</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.3115/1596374.1596381</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Patrick</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Automatic negation detection in narrative pathology reports</article-title>
          <source>Artif Intell Med</source>
          <year>2015</year>
          <month>05</month>
          <volume>64</volume>
          <issue>1</issue>
          <fpage>41</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2015.03.001</pub-id>
          <pub-id pub-id-type="medline">25990897</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(15)00015-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2019</year>
          <conf-name>NAACL' 19</conf-name>
          <conf-date>June 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN, USA</conf-loc>
          <fpage>4171</fpage>
          <lpage>86</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rivera Zavala</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Martinez</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The impact of pretrained language models on negation and speculation detection in cross-lingual medical text: comparative study</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>12</month>
          <day>03</day>
          <volume>8</volume>
          <issue>12</issue>
          <fpage>e18953</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/12/e18953/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/18953</pub-id>
          <pub-id pub-id-type="medline">33270027</pub-id>
          <pub-id pub-id-type="pii">v8i12e18953</pub-id>
          <pub-id pub-id-type="pmcid">PMC7746498</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goodman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gimpel</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Soricut</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>ALBERT: a lite BERT for self-supervised learning of language representations</article-title>
          <source>Proceedings of the 2020 International Conference on Learning Representations</source>
          <year>2020</year>
          <conf-name>ICLR '20</conf-name>
          <conf-date>April 26-30, 2020</conf-date>
          <conf-loc>Addis Ababa, Ethiopia</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openreview.net/pdf?id=H1eA7AEtvS"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sanh</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Debut</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chaumond</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter</article-title>
          <source>Proceedings of the 2019 Conference on Neural Information Processing Systems</source>
          <year>2019</year>
          <conf-name>NeurIPS '19</conf-name>
          <conf-date>December 8-14, 2019</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Carbonell</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Salakhutdinov</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>QV</given-names>
            </name>
          </person-group>
          <article-title>Xlnet: generalized autoregressive pretraining for language understanding</article-title>
          <source>Proceedings of the 33rd International Conference on Neural Information Processing Systems</source>
          <year>2019</year>
          <conf-name>NIPS'19</conf-name>
          <conf-date>December 8-14, 2019</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <fpage>5753</fpage>
          <lpage>63</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/3454287.3454804"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RoBERTa: a robustly optimized BERT pretraining approach</article-title>
          <source>Proceedings of the 2020 International Conference on Learning Representations</source>
          <year>2020</year>
          <conf-name>ICLR '20</conf-name>
          <conf-date>April 26-30, 2020</conf-date>
          <conf-loc>Addis Ababa, Ethiopia</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openreview.net/attachment?id=SyxS0T4tvS&#38;name=original_pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>ERNIE: enhanced representation through knowledge integration</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on April 19, 2018</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1904.09223</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Weld</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>SpanBERT: improving pre-training by representing and predicting spans</article-title>
          <source>Trans Assoc Comput Linguist</source>
          <year>2020</year>
          <month>12</month>
          <volume>8</volume>
          <fpage>64</fpage>
          <lpage>77</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.tacl-1.5.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00300</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>DeBERTa: decoding-enhanced BERT with disentangled attention</article-title>
          <source>Proceedings of the 2021 International Conference on Learning Representations</source>
          <year>2021</year>
          <conf-name>ICLR '21</conf-name>
          <conf-date>May 3-7, 2021</conf-date>
          <conf-loc>Virtual</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openreview.net/pdf?id=XPZIaotutsD"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Luong</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>QV</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Electra: pre-training text encoders as discriminators rather than generators</article-title>
          <source>Proceedings of the 8th International Conference on Learning Representations</source>
          <year>2020</year>
          <conf-name>ICLR' 20</conf-name>
          <conf-date>April 26-May 1, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <fpage>1</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openreview.net/pdf?id=r1xMH1BtvB"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Laparra</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bethard</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Domain adaptation in practice: lessons from a real-world information extraction pipeline</article-title>
          <source>Proceedings of the 2nd Workshop on Domain Adaptation for NLP</source>
          <year>2021</year>
          <month>04</month>
          <day>20</day>
          <conf-name>AdaptNLP' 21</conf-name>
          <conf-date>April 20, 2021</conf-date>
          <conf-loc>Kyiv, Ukraine</conf-loc>
          <fpage>105</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2021.adaptnlp-1.11.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hiroki</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Takahiro</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Junya</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yasufumi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>doccano: text annotation tool for human</article-title>
          <source>GitHub</source>
          <year>2018</year>
          <access-date>2022-11-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/doccano/doccano">https://github.com/doccano/doccano</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Honnibal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Montani</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Van Landeghem</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>spaCy: industrial-strength natural language processing in Python</article-title>
          <source>spaCy</source>
          <year>2020</year>
          <access-date>2022-11-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://spacy.io">https://spacy.io</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Masanz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Coarr</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Halgrim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Carrell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Negation's not solved: generalizability versus optimizability in clinical natural language processing</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <month>11</month>
          <day>13</day>
          <volume>9</volume>
          <issue>11</issue>
          <fpage>e112774</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0112774"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0112774</pub-id>
          <pub-id pub-id-type="medline">25393544</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-09493</pub-id>
          <pub-id pub-id-type="pmcid">PMC4231086</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grivas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alex</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grover</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tobin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Whiteley</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Not a cute stroke: analysis of rule- and neural network-based information extraction systems for brain radiology reports</article-title>
          <source>Proceedings of the 11th International Workshop on Health Text Mining and Information Analysis</source>
          <year>2020</year>
          <conf-name>Louhi' 20</conf-name>
          <conf-date>November 20, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <fpage>24</fpage>
          <lpage>37</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.louhi-1.4.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.louhi-1.4</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
