<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i9e37812</article-id>
      <article-id pub-id-type="pmid">36099001</article-id>
      <article-id pub-id-type="doi">10.2196/37812</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Mining Severe Drug Hypersensitivity Reaction Cases in Pediatric Electronic Health Records: Methodology Development and Applications</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Hao</surname>
            <given-names>Tianyong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Luo</surname>
            <given-names>Jake</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chen</surname>
            <given-names>Lichin</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>Yuncui</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1559-9752</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>Qiuye</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0662-3415</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Cao</surname>
            <given-names>Wang</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3967-9244</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Xiaochuan</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9158-7959</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Yanming</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8605-682X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Xie</surname>
            <given-names>Yuefeng</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3914-3730</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Xiaoling</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>National Center for Children's Health</institution>
            <institution>Beijing Children’s Hospital</institution>
            <institution>Capital Medical University</institution>
            <addr-line>56 Nanlishi Road</addr-line>
            <addr-line>Xicheng District</addr-line>
            <addr-line>Beijing, 100045</addr-line>
            <country>China</country>
            <phone>86 59617173</phone>
            <fax>86 59616083</fax>
            <email>wangxiaoling@bch.com.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2136-7410</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>National Center for Children's Health</institution>
        <institution>Beijing Children’s Hospital</institution>
        <institution>Capital Medical University</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Bohui Yishu (Beijing) Co, Ltd</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Xiaoling Wang <email>wangxiaoling@bch.com.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>9</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>13</day>
        <month>9</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>9</issue>
      <elocation-id>e37812</elocation-id>
      <history>
        <date date-type="received">
          <day>8</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>4</day>
          <month>5</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>6</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>12</day>
          <month>8</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Yuncui Yu, Qiuye Zhao, Wang Cao, Xiaochuan Wang, Yanming Li, Yuefeng Xie, Xiaoling Wang. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 13.09.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/9/e37812" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Severe drug hypersensitivity reactions (DHRs) refer to allergic reactions caused by drugs and usually present with severe skin rashes and internal damage as the main symptoms. Reporting of severe DHRs in hospitals now solely occurs through spontaneous reporting systems (SRSs), which clinicians in charge operate. An automatic identification system scrutinizes clinical notes and reports potential severe DHR cases.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The goal of the research was to develop an automatic identification system for mining severe DHR cases and discover more DHR cases for further study. The proposed method was applied to 9 years of data in pediatrics electronic health records (EHRs) of Beijing Children’s Hospital.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The phenotyping task was approached as a document classification problem. A DHR dataset containing tagged documents for training was prepared. Each document contains all the clinical notes generated during 1 inpatient visit in this data set. Document-level tags correspond to DHR types and a negative category. Strategies were evaluated for long document classification on the openly available National NLP Clinical Challenges 2016 smoking task. Four strategies were evaluated in this work: document truncation, hierarchy representation, efficient self-attention, and key sentence selection. In-domain and open-domain pretrained embeddings were evaluated on the DHR dataset. An automatic grid search was performed to tune statistical classifiers for the best performance over the transformed data. Inference efficiency and memory requirements of the best performing models were analyzed. The most efficient model for mining DHR cases from millions of documents in the EHR system was run.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>For long document classification, key sentence selection with guideline keywords achieved the best performance and was 9 times faster than hierarchy representation models for inference. The best model discovered 1155 DHR cases in Beijing Children’s Hospital EHR system. After double-checking by clinician experts, 357 cases of severe DHRs were finally identified. For the smoking challenge, our model reached the record of state-of-the-art performance (94.1% vs 94.2%).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The proposed method discovered 357 positive DHR cases from a large archive of EHR records, about 90% of which were missed by SRSs. SRSs reported only 36 cases during the same period. The case analysis also found more suspected drugs associated with severe DHRs in pediatrics.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>drug hypersensitivity reactions</kwd>
        <kwd>electronic health records</kwd>
        <kwd>clinical notes</kwd>
        <kwd>phenotyping</kwd>
        <kwd>natural language processing</kwd>
        <kwd>medical language processing</kwd>
        <kwd>bidirectional encoder representation from transformers</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Drug hypersensitivity reactions (DHRs) are one of the adverse drug reactions resembling allergy occurs. DHRs affect more than 7% of the population and are a significant cause of the postmarketing withdrawal of drugs [<xref ref-type="bibr" rid="ref1">1</xref>]. Severe DHRs, such as anaphylactic shock, drug-induced hypersensitivity syndrome, Stevens-Johnson syndrome, and epidermolysis bullosa, have been observed worldwide with an annual incidence of 0.05 to 3 persons per million population. With mortality rates varying between 5% to 30%, severe DHRs in pediatric populations, including children, infants, and even newborns, comprise 10% to 20% of reported cases [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>].</p>
      <p>Reporting of severe DHRs in hospitals now solely occurs through spontaneous reporting systems (SRSs), which clinicians in charge operate. Previous studies showed that only 10% to 30% of severe adverse drug reactions were reported in SRSs [<xref ref-type="bibr" rid="ref4">4</xref>]. Even though the missed cases were properly handled and simply not logged into the SRS system, a more thorough report would have helped improve drug guidelines. Recently, routinely collected medical data such as electronic health records (EHRs) are increasingly being used to complement the SRS and enable active pharmacovigilance. EHR systems contain detailed data with timestamps for admissions, discharges, diagnoses, medications, and laboratory tests. However, severe DHR rely on symptoms and signs for detection, which in turn often reside in the free-text areas of EHRs and require the use of natural language processing to extract information.</p>
      <p>One of the most well-studied medical language processing applications is phenotyping (eg, the automatic evaluation of phenomics traits such as smoking status) [<xref ref-type="bibr" rid="ref5">5</xref>]. Automatic identification of severe DHRs in patients can also be explored as a phenotyping task. When no structural data are available, the phenotyping of clinical notes can be formulated as a document classification task, which has been well studied in the natural language processing field.</p>
      <p>Recent work [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>] has reported that clinical documents are too long for contextualized language models to process. Our research group has integrated the medical data from a hospital and established a vertical data warehouse in its early stage. Unlike previous works that only process discharge summaries [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>], this DHR task deals with documents consisting of all clinical notes associated with 1 inpatient visit. The average word length of discharge summaries is typically hundreds of words. However, in this DHR data set, the average word length is up to several thousand Chinese characters, and some documents contain tens of thousands of Chinese characters. Therefore, picking the best strategy for long document classification is crucial for achieving our objective.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Pipeline Design</title>
        <p>This work approaches the automatic identification of DHR cases as a long document classification problem. For training purposes, domain experts prepared a corpus containing document-level tags.</p>
        <p><xref rid="figure1" ref-type="fig">Figure 1</xref> demonstrates the proposed system pipeline. First, 4 strategies for long document classification on the openly available smoking task were compared and evaluated. Second, the best strategy for the DHR task was applied. The pretrained embedding models of Chinese medical text on our own DHR task were compared and evaluated. A grid search to tune machine learning classifiers for the best document classification performance on the DHR data set was performed. Finally, the best pipeline to 9 years of data in a paramedic EHR was applied.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Proposed system pipeline in this study. DHR: drug hypersensitivity reaction; EHR: electronic health record.</p>
          </caption>
          <graphic xlink:href="medinform_v10i9e37812_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>The study was reviewed and approved (2019-k-5) by the Institutional Ethics Committee of Beijing Children’s Hospital in China, with a waiver of informed consent.</p>
      </sec>
      <sec>
        <title>Data Set and Metrics</title>
        <sec>
          <title>Smoking Task</title>
          <p>The smoking challenge [<xref ref-type="bibr" rid="ref5">5</xref>] automatically determines patients’ smoking status from their discharge summaries. The 502 discharge summaries present 5 statuses: past smoker, current smoker, smoker, nonsmoker, and unknown. Following previous work, the class smoker was ignored. <xref ref-type="table" rid="table1">Table 1</xref> shows the training and test data distribution.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>The training and test data distribution of the smoking task.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="210"/>
              <col width="160"/>
              <col width="200"/>
              <col width="170"/>
              <col width="150"/>
              <col width="110"/>
              <thead>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Past smoker</td>
                  <td>Current smoker</td>
                  <td>Nonsmoker</td>
                  <td>Unknown</td>
                  <td>Total</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Train data set</td>
                  <td>36</td>
                  <td>35</td>
                  <td>66</td>
                  <td>252</td>
                  <td>389</td>
                </tr>
                <tr valign="top">
                  <td>Test data set</td>
                  <td>11</td>
                  <td>11</td>
                  <td>16</td>
                  <td>63</td>
                  <td>101</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
        <sec>
          <title>Severe DHR Task</title>
          <sec>
            <title>Data Source</title>
            <p>Beijing Children’s Hospital’s information system allows for a patient’s history and physician notes to be digitally recorded and instantaneously available via the network to all patient departments. A vertical data warehouse was built based on the integration of medical data in the early stage. It contains 431,972 hospitalization records of 315,608 patients from January 1, 2012, to December 31, 2020, including detailed diagnostic information, medication information, laboratory tests, disease course data, etc. Among them, a hospitalization record represents a hospitalization process. If a patient is hospitalized multiple times, the same patient will have multiple hospitalization records.</p>
          </sec>
          <sec>
            <title>Corpus Construction</title>
            <p>Positive cases that present severe DHRs were collected from 2 pools: the 31 positive cases logged to National Medical Products Administration reporting system and the 183 positive cases discovered by chart review. After deduplication, 200 positive cases were collected. Each positive case was assigned 1 of 4 subcategories. Furthermore, 400 negative cases were randomly sampled from Beijing Children’s Hospital’s EHR system. These cases were assigned a negative (NEG) tag and hand-checked by physicians to ensure they did not present severe DHRs.</p>
            <p>The definitions of the 4 subtypes of severe DHR are shown in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> as found in the Guidelines for Medical Nomenclature Use of Adverse Drug Reactions issued by the Center for Drug Reevaluation of the China National Medical Products Administration in 2016 [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
          </sec>
          <sec>
            <title>Training and Test Data Set</title>
            <p>These 5 categories of documents were randomly sampled into the training and test data sets. The training and test data distribution is shown in <xref ref-type="table" rid="table2">Table 2</xref>. The positive and negative ratio is close to the corresponding ratio in the smoking task.</p>
            <table-wrap position="float" id="table2">
              <label>Table 2</label>
              <caption>
                <p>The training and test data distribution of the severe drug hypersensitivity reaction data set.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="270"/>
                <col width="100"/>
                <col width="130"/>
                <col width="110"/>
                <col width="120"/>
                <col width="150"/>
                <col width="120"/>
                <thead>
                  <tr valign="bottom">
                    <td>
                      <break/>
                    </td>
                    <td>SJS<sup>a</sup></td>
                    <td>DIHS<sup>b</sup></td>
                    <td>AS<sup>c</sup></td>
                    <td>EB<sup>d</sup></td>
                    <td>NEG<sup>e</sup></td>
                    <td>Total</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>Training data set</td>
                    <td>56</td>
                    <td>44</td>
                    <td>18</td>
                    <td>32</td>
                    <td>323</td>
                    <td>473</td>
                  </tr>
                  <tr valign="top">
                    <td>Test data set</td>
                    <td>18</td>
                    <td>3</td>
                    <td>5</td>
                    <td>7</td>
                    <td>77</td>
                    <td>110</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table2fn1">
                  <p><sup>a</sup>SJS: Stevens-Johnson syndrome.</p>
                </fn>
                <fn id="table2fn2">
                  <p><sup>b</sup>DIHS: drug-induced hypersensitivity syndrome.</p>
                </fn>
                <fn id="table2fn3">
                  <p><sup>c</sup>AS: anaphylactic shock.</p>
                </fn>
                <fn id="table2fn4">
                  <p><sup>d</sup>EB: epidermolysis bullosa.</p>
                </fn>
                <fn id="table2fn5">
                  <p><sup>e</sup>NEG: negative.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
        </sec>
        <sec>
          <title>Evaluation Metrics</title>
          <p>The micro-averaged F1 score was used to evaluate the performance of different models following previous study [<xref ref-type="bibr" rid="ref6">6</xref>]. This metric is used for multiclass classification problems, measuring a balance between precision and recall and giving equal weights to each category.</p>
        </sec>
      </sec>
      <sec>
        <title>Strategies for Long Document Classification</title>
        <p>Four strategies were evaluated and compared: document truncation [<xref ref-type="bibr" rid="ref10">10</xref>], hierarchy representation [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref11">11</xref>], more efficient self-attention [<xref ref-type="bibr" rid="ref12">12</xref>], and key sentence selection [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. The best strategy for long document classification was based on the openly available National NLP Clinical Challenges 2016 smoking task results [<xref ref-type="bibr" rid="ref5">5</xref>]. The results of this task can be more fairly compared to other related works.</p>
        <sec>
          <title>Document Truncation</title>
          <p>The most straightforward way to apply a transformer model with a length limit is to truncate the input and pick the first block of tokens. These models typically require a length limit of 512 words.</p>
        </sec>
        <sec>
          <title>More Efficient Self-Attention</title>
          <p>Self-attention models, such as bidirectional encoder representation from transformer (BERT), require quadratic computational time and space with respect to the input sequence length. The Longformer model uses sparse self-attention instead of full self-attention to process longer documents (up to 4096 tokens).</p>
        </sec>
        <sec>
          <title>Hierarchy Representation</title>
          <p>In a hierarchy approach, sentence representations are built first and then aggregated into a document-level representation. In previous work on the phenotyping task of clinical notes, document representation is built by a sampling layer on top of the BERT blocks of each sentence [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
        </sec>
        <sec>
          <title>Key Sentence Selection</title>
          <p>A few key sentences could be enough for the document classification task. In previous works, unsupervised methods were explored to generate key sentences, which did not always perform well [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. In this work, the keywords extracted from the task-specific guidelines were explored. The sentences containing keywords were selected as key sentences.</p>
          <p>For the smoking task, unigrams and bigrams from previous work were taken as the keyword list: cigarette, smoke, smoked, smoker, smokes, smoking, tobacco [<xref ref-type="bibr" rid="ref16">16</xref>].</p>
          <p>For the DHR task, 2 sets of keywords were evaluated and compared. As an unsupervised method, the term frequency-inverse document frequency (TF-IDF) algorithm computed top feature words. Those containing numbers, foreign alphabets, and special characters were removed from these 2000 words. A total of 163 feature words with a score higher than zero were added to the keyword list.</p>
          <p>The parts of the clinical notes that make references to the corresponding guidelines are most relevant for differential classification. Each positive category in the DHR data set is well defined in the corresponding guideline [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref20">20</xref>]. Medical terms were hand-picked from the guidelines. No domain knowledge was required to distinguish medical terms from general text. These keywords are shown in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> in Chinese and <xref ref-type="boxed-text" rid="box2">Textbox 2</xref> in English.</p>
          <boxed-text id="box1" position="float">
            <title>The guideline keywords for the severe drug hypersensitivity reaction task in Chinese. AS: anaphylactic shock; DIHS: drug-induced hypersensitivity syndrome; EB: epidermolysis bullosa; IVIG: intravenous immunoglobulin; SJS: Stevens-Johnson syndrome; TEN: toxic epidermal necrolysis.</title>
            <list list-type="order">
              <list-item>
                <p>Stevens-Johnson综合征, 过敏性休克, 药物超敏反应综合征, 大疱表皮松解症, AS, EB, TEN, SJS, DIHS</p>
              </list-item>
              <list-item>
                <p>过敏，超敏，黏膜，红斑，松解，喘鸣，支气管痉挛，发绀，呼气流量峰值下降，肌张力减退，荨麻疹，血管性水肿，紫绀，低血容量性低血压，斑疹，斑丘疹，无菌性脓疱，紫癜，剥脱性皮炎，融合成片，松弛性水疱，表皮松解，大疱，表皮剥脱，叶状鳞屑，表皮剥离，猩红热样，麻疹样，弥漫性，黏膜侵蚀，大疱</p>
              </list-item>
              <list-item>
                <p>糖皮质激素，肾上腺素，甲基泼尼松龙，泼尼松，地塞米松， IVIG，甲泼尼龙</p>
              </list-item>
            </list>
          </boxed-text>
          <boxed-text id="box2" position="float">
            <title>The guideline keywords for the severe drug hypersensitivity reaction task in English. AS: anaphylactic shock; DIHS: drug-induced hypersensitivity syndrome; EB: epidermolysis bullosa; IVIG: intravenous immunoglobulin; SJS: Stevens-Johnson syndrome; TEN: toxic epidermal necrolysis.</title>
            <list list-type="order">
              <list-item>
                <p>Stevens-Johnson syndrome, anaphylactic shock, drug-induced hypersensitivity syndrome, epidermolysis bullosa, AS, EB, TEN, SJS, DIHS</p>
              </list-item>
              <list-item>
                <p>Allergy, hypersensitivity, mucous membrane, erythema, epidermolysis, wheezing, bronchospasm, cyanosis, decreased peak expiratory flow, dystonia, urticaria, angioedema, hypovolemic hypotension, macula, maculopapular, sterile pustules, purpura, confluent, flaccid blister, bulla, exfoliative, scales, Scarlet fever–like, measles, diffuse, mucosal erosion, IVIG</p>
              </list-item>
              <list-item>
                <p>glucocorticoid, adrenaline, prednisolone, prednisone, dexamethasone, methylpred</p>
              </list-item>
            </list>
          </boxed-text>
        </sec>
        <sec>
          <title>Data Set With Selected Text</title>
          <p>An oracle test was conducted to evaluate whether the strategy of key sentence selection affects performance. This oracle test was performed as follows: (1) for each document that contains any keyword, assign its gold tag, and (2) for all the documents that contain no keywords, assign the UNKNOWN tag (for the smoking task) or the NEG tag (for the DHR task).</p>
          <p>As shown in <xref ref-type="table" rid="table3">Table 3</xref>, key sentence selection reduced the maximum word count and the average word count for both data sets of the smoking task. The oracle micro-F1 was 1.0 for both the training and test set, which meant that the key sentence selection strategy did not affect the overall performance.</p>
          <p>Two lists of keywords were evaluated for the DHR task: TF-IDF keywords and guideline keywords. As shown in <xref ref-type="table" rid="table4">Table 4</xref>, key sentence selection reduced the maximum word count and the average word count for both training and test data sets of the DHR task. The oracle test showed that with TF-IDF keywords, the oracle micro-F1 score was almost 1.0. With guideline keywords, about 2% to 3% of errors in the whole pipeline were introduced by this strategy.</p>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Statistics on the original and selected text in the smoking task<sup>a</sup>.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="0"/>
              <col width="160"/>
              <col width="300"/>
              <col width="320"/>
              <col width="220"/>
              <thead>
                <tr valign="top">
                  <td colspan="2">
                    <break/>
                  </td>
                  <td>Maximum word count</td>
                  <td>Average word count</td>
                  <td>Oracle micro-F1</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Train</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Original</td>
                  <td>3025</td>
                  <td>766</td>
                  <td>—<sup>b</sup></td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Selected</td>
                  <td>194</td>
                  <td>18</td>
                  <td>1</td>
                </tr>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Test</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Original</td>
                  <td>2529</td>
                  <td>851</td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Selected</td>
                  <td>117</td>
                  <td>18</td>
                  <td>1</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table3fn1">
                <p><sup>a</sup>For word counting, all terms split by space delimiters were considered words.</p>
              </fn>
              <fn id="table3fn2">
                <p><sup>b</sup>Not applicable.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>Statistics on the original and selected text in the severe drug hypersensitivity reaction task<sup>a</sup>.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="0"/>
              <col width="30"/>
              <col width="160"/>
              <col width="0"/>
              <col width="0"/>
              <col width="310"/>
              <col width="0"/>
              <col width="0"/>
              <col width="300"/>
              <col width="0"/>
              <col width="0"/>
              <col width="200"/>
              <thead>
                <tr valign="top">
                  <td colspan="5">Keywords</td>
                  <td colspan="3">Maximum average count</td>
                  <td colspan="3">Average character count</td>
                  <td>Oracle micro-F1</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="12">
                    <bold>Train</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="3">Original</td>
                  <td colspan="3">27198</td>
                  <td colspan="3">4615</td>
                  <td colspan="2">—<sup>b</sup></td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="11">
                    <bold>Selected</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>TF-IDF<sup>c</sup></td>
                  <td colspan="3">4681</td>
                  <td colspan="3">770</td>
                  <td colspan="3">0.99</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>Guideline</td>
                  <td colspan="3">1926</td>
                  <td colspan="3">199</td>
                  <td colspan="3">0.98</td>
                </tr>
                <tr valign="top">
                  <td colspan="12">
                    <bold>Test</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="3">Original</td>
                  <td colspan="3">15454</td>
                  <td colspan="3">3963</td>
                  <td colspan="2">—</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="11">
                    <bold>Selected</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>TF-IDF</td>
                  <td colspan="3">3210</td>
                  <td colspan="3">687</td>
                  <td colspan="3">1</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>Guideline</td>
                  <td colspan="3">636</td>
                  <td colspan="3">177</td>
                  <td colspan="3">0.97</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table4fn1">
                <p><sup>a</sup>For the drug hypersensitivity reaction data set, Chinese characters were counted.</p>
              </fn>
              <fn id="table4fn2">
                <p><sup>b</sup>Not applicable.</p>
              </fn>
              <fn id="table4fn3">
                <p><sup>c</sup>TF-IDF: term frequency-inverse document frequency.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
      </sec>
      <sec>
        <title>Transformers</title>
        <p>In-domain and open-domain pretrained embeddings by contextualized language models were evaluated in this work. For implementation, the SBERT library [<xref ref-type="bibr" rid="ref10">10</xref>] computes document embedding with pretrained open-domain or domain-specific language models. There was no fine-tuning conducted for these pretrained models.</p>
        <p>This work evaluated the open-domain model bert-base-uncased [<xref ref-type="bibr" rid="ref21">21</xref>] and domain-specific models ClinicalBERT and DischargeBERT [<xref ref-type="bibr" rid="ref20">20</xref>] for English clinical notes.</p>
        <p>This work evaluated the open-domain model bert-base-chinese [<xref ref-type="bibr" rid="ref21">21</xref>] and domain-specific model Medbert-kd-chinese [<xref ref-type="bibr" rid="ref22">22</xref>] for Chinese clinical notes.</p>
      </sec>
      <sec>
        <title>Machine Learning Classifiers</title>
        <p>Machine learning classifiers were stacked on top of deep learning transformers. Each machine learning classifier was tuned by 10-fold cross-validation on the training data set. An automatic grid search framework [<xref ref-type="bibr" rid="ref10">10</xref>] searched for optimal hyperparameters. This work evaluated linear models with stochastic gradient descent (SGD) learning and libsvm for support vector classification (SVC).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Smoking Task: Strategies for Long Document Classification</title>
        <sec>
          <title>Document Truncation</title>
          <p>The library SBERT implemented this strategy with pretrained models BERT, ClinicalBERT, and DischargeBERT. As shown in <xref ref-type="table" rid="table5">Table 5</xref>, these models performed poorly. When long documents were straightforwardly fed into the transformers, only the first 512-word pieces were reserved.</p>
          <table-wrap position="float" id="table5">
            <label>Table 5</label>
            <caption>
              <p>Phenotyping results (micro-averaged F1) of the smoking task.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="290"/>
              <col width="200"/>
              <col width="250"/>
              <col width="260"/>
              <thead>
                <tr valign="top">
                  <td>Transformer</td>
                  <td>Classifier</td>
                  <td colspan="2">Micro-averaged F1 (%)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>Original text</td>
                  <td>Selected text</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Longformer</td>
                  <td>SGD<sup>a</sup></td>
                  <td>63.37</td>
                  <td>78.22</td>
                </tr>
                <tr valign="top">
                  <td>Bert-base-uncased</td>
                  <td>SGD</td>
                  <td>67.33</td>
                  <td>90.01</td>
                </tr>
                <tr valign="top">
                  <td>DischargeBERT</td>
                  <td>SGD</td>
                  <td>63.37<sup>b</sup></td>
                  <td>91.09</td>
                </tr>
                <tr valign="top">
                  <td>ClinicalBERT</td>
                  <td>SGD</td>
                  <td>60.40</td>
                  <td>94.06</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table5fn1">
                <p><sup>a</sup>SGD: stochastic gradient descent.</p>
              </fn>
              <fn id="table5fn2">
                <p><sup>b</sup>Given the size of the data set, some models may have the same results.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>More Efficient Self-Attention</title>
          <p>The Longformer model uses sparse self-attention instead of full self-attention to process longer documents (up to 4096 tokens). However, as shown in <xref ref-type="table" rid="table5">Table 5</xref>, it did not outperform BERT baselines.</p>
        </sec>
        <sec>
          <title>Key Sentence Selection</title>
          <p>This work used unigrams and bigrams from Pedersen [<xref ref-type="bibr" rid="ref16">16</xref>] to select key sentences. As shown in <xref ref-type="table" rid="table5">Table 5</xref>, each model performs better on the selected text. The domain-specific pretrained language model, ClinicalBERT (91.09%), and DischargeBERT (93.07%) outperformed the open-domain model, bert-base-uncased (90.01%).</p>
        </sec>
        <sec>
          <title>Hierarchy Representation</title>
          <p>In a hierarchy approach, sentence representations are built first and then aggregated into a document-level representation. For a fair comparison, we evaluated and reported the results of previous work [<xref ref-type="bibr" rid="ref6">6</xref>] with our own evaluation script. As shown in <xref ref-type="table" rid="table6">Table 6</xref>, the <italic>f</italic><sub>mean</sub> architecture in [<xref ref-type="bibr" rid="ref6">6</xref>] (94.2%) achieved state-of-the-art performance.</p>
          <p>As shown in <xref ref-type="table" rid="table6">Table 6</xref>, our method (94.1%) achieved comparable performance with the top-performing method. Other earlier work for the smoking task (F1 ranged from 77.0% to 90.0%) did not achieve the same level of performance.</p>
          <p>The strategies of key sentence selection and hierarchy representation achieve comparable performance. Furthermore, their efficiency and memory requirements were compared. As summarized in <xref ref-type="table" rid="table7">Table 7</xref>, GPU was not required for training machine learning classifiers in the proposed pipeline. The hierarchy representation model required a Tesla M40 GPU (Nvidia Corp) to train for 1 day. Our method was about 9 times faster than the hierarchy representation model for inference. With the strategies of both documentation truncation and key sentence selection, only 1 block was processed by the transformer models for each document, so the inference time was not reduced by key sentence selection.</p>
          <table-wrap position="float" id="table6">
            <label>Table 6</label>
            <caption>
              <p>Phenotyping results (micro-averaged F1) of our methods and previous work<sup>a</sup> of the smoking task.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="630"/>
              <col width="370"/>
              <thead>
                <tr valign="top">
                  <td>Transformer</td>
                  <td>Micro-averaged F1 (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>ClinicalBERT (ours)</td>
                  <td>94.1</td>
                </tr>
                <tr valign="top">
                  <td><italic>f</italic><sub>mean</sub> [<xref ref-type="bibr" rid="ref6">6</xref>]</td>
                  <td>94.2</td>
                </tr>
                <tr valign="top">
                  <td>Shared task 1st place [<xref ref-type="bibr" rid="ref23">23</xref>]</td>
                  <td>90.0</td>
                </tr>
                <tr valign="top">
                  <td>Majority label baseline [<xref ref-type="bibr" rid="ref6">6</xref>]</td>
                  <td>81.0</td>
                </tr>
                <tr valign="top">
                  <td>CNN<sup>b</sup> [<xref ref-type="bibr" rid="ref24">24</xref>]</td>
                  <td>77.0</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table6fn1">
                <p><sup>a</sup>Our method and <italic>f<sub>mean</sub></italic> were evaluated by the same script over the test data set. Other results were found directly from their published reports. For comparison, the precision of the results is 0.1%.</p>
              </fn>
              <fn id="table6fn2">
                <p><sup>b</sup>CNN: convolutional neural networks.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <table-wrap position="float" id="table7">
            <label>Table 7</label>
            <caption>
              <p>Runtime and memory requirements of each model. The training time and GPU requirement of <italic>f</italic><sub>mean</sub> are taken from previous work [<xref ref-type="bibr" rid="ref6">6</xref>]. The inference time on the test data set was evaluated on a GPU server with NVIDIA T4 and 4*cpu (Nvidia Corp).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="190"/>
              <col width="160"/>
              <col width="280"/>
              <col width="190"/>
              <col width="180"/>
              <thead>
                <tr valign="top">
                  <td>Model</td>
                  <td>Documents</td>
                  <td>Inference time on test data set (seconds)</td>
                  <td>Training time (hours)</td>
                  <td>GPU memory</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td><italic>f</italic><sub>mean</sub> [<xref ref-type="bibr" rid="ref6">6</xref>]</td>
                  <td>text</td>
                  <td>35.52</td>
                  <td>24</td>
                  <td>16</td>
                </tr>
                <tr valign="top">
                  <td>ClinicalBert</td>
                  <td>text</td>
                  <td>0.46</td>
                  <td>—<sup>a</sup></td>
                  <td>—</td>
                </tr>
                <tr valign="top">
                  <td>+MLClassifier</td>
                  <td>selected text</td>
                  <td>0.437</td>
                  <td>1</td>
                  <td>—</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table7fn1">
                <p><sup>a</sup>Not applicable.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
      </sec>
      <sec>
        <title>Severe DHR Task: Stacked Transformers and Classifiers</title>
        <p>The smoking task showed that key sentence selection improved self-attention transformers with length limits. In the DHR task, this strategy was evaluated with various transformers and classifiers. As discussed in Methods, 2 kinds of keywords were evaluated and compared. As an unsupervised method, top TF-IDF [<xref ref-type="bibr" rid="ref8">8</xref>] feature words were used for key sentence selection. Considering that clinical notes comply with guidelines, keywords were drawn from the DHR guidelines.</p>
        <p>As shown in <xref ref-type="table" rid="table8">Table 8</xref>, the guideline keywords always improved the performance, regardless of the stacked transformers and classifiers. The TF-IDF keywords only help with the SVC classifier.</p>
        <table-wrap position="float" id="table8">
          <label>Table 8</label>
          <caption>
            <p>Phenotyping results (micro-averaged F1) of different transformers for the severe drug hypersensitivity reaction task.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="0"/>
            <col width="350"/>
            <col width="0"/>
            <col width="220"/>
            <col width="0"/>
            <col width="240"/>
            <col width="0"/>
            <col width="190"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Transformers and classifiers</td>
                <td colspan="5">Micro-averaged F1(%)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">Original text</td>
                <td colspan="3">Selected text</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">TF-IDF<sup>a</sup></td>
                <td>guidelines</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>Bert-base-chinese</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SVC<sup>b</sup></td>
                <td colspan="2">80.91</td>
                <td colspan="2">82.73</td>
                <td colspan="2">87.27</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SGD<sup>c</sup></td>
                <td colspan="2">80.00</td>
                <td colspan="2">77.27</td>
                <td colspan="2">86.36</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Medbert-kd-chinese</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SVC</td>
                <td colspan="2">81.82</td>
                <td colspan="2">83.64</td>
                <td colspan="2">89.09</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SGD</td>
                <td colspan="2">82.73</td>
                <td colspan="2">73.64</td>
                <td colspan="2">87.27</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table8fn1">
              <p><sup>a</sup>TF-IDF: term frequency-inverse document frequency.</p>
            </fn>
            <fn id="table8fn2">
              <p><sup>b</sup>SVC: support vector classification.</p>
            </fn>
            <fn id="table8fn3">
              <p><sup>c</sup>SGD: stochastic gradient descent.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Applications in a 9-Year EHR</title>
        <p>Finally, the best configuration was applied to the 9 years of data in Beijing Children’s Hospital’s EHRs. A total of 1155 cases were alerted. After double-checking by 2 clinicians and 2 pharmacists in pediatrics based on the criterion of severe DHRs, 357 cases of severe DHRs in children were found (<xref ref-type="table" rid="table9">Table 9</xref>): anaphylactic shock (n=39), drug-induced hypersensitivity syndrome (n=178), Stevens-Johnson syndrome (n=86), and epidermolysis bullosa (n=54). Only 36 of 356 severe DHRs had been reported to SRS before. About 89.89% of cases were underreported, resulting in insufficient attention from drug regulators and clinicians. This suggests that our method could actively identify severe DHRs providing additional evidence for pharmacovigilance in children.</p>
        <p>The case analysis indicated many suspected drugs that may cause severe DHRs in pediatrics. The suspected drugs leading to anaphylactic shock mainly included pegaspargase injection, L-asparaginase, cefoperazone sulbactam, etc. Phenobarbital, nimesulide, and cephalosporin antibiotics were the key suspected drugs leading to drug-induced hypersensitivity syndrome and Stevens-Johnson syndrome. In addition, lamotrigine, lysine acetylsalicylate, and meropenem were closely related to the occurrence of epidermolysis bullosa.</p>
        <table-wrap position="float" id="table9">
          <label>Table 9</label>
          <caption>
            <p>Distribution of the severe drug hypersensitivity reactions cases in 9 years of electronic health records found by the proposed pipeline.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="210"/>
            <col width="200"/>
            <col width="230"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Severe DHR<sup>a</sup></td>
                <td>Reported in SRS<sup>b</sup> of BCH<sup>c</sup>, n</td>
                <td colspan="3">DHR cases confirmed by experts, (n)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Diagnosed in BCH</td>
                <td>Diagnosed in other hospitals</td>
                <td>Total</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>AS<sup>d</sup></td>
                <td>4</td>
                <td>26</td>
                <td>13</td>
                <td>39</td>
              </tr>
              <tr valign="top">
                <td>DIHS<sup>e</sup></td>
                <td>16</td>
                <td>29</td>
                <td>149</td>
                <td>178</td>
              </tr>
              <tr valign="top">
                <td>SJS<sup>f</sup></td>
                <td>7</td>
                <td>9</td>
                <td>77</td>
                <td>86</td>
              </tr>
              <tr valign="top">
                <td>EB<sup>g</sup></td>
                <td>9</td>
                <td>8</td>
                <td>46</td>
                <td>54</td>
              </tr>
              <tr valign="top">
                <td>Total</td>
                <td>36</td>
                <td>72</td>
                <td>285</td>
                <td>357</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table9fn1">
              <p><sup>a</sup>DHR: drug hypersensitivity reaction.</p>
            </fn>
            <fn id="table9fn2">
              <p><sup>b</sup>SRS: spontaneous reporting system.</p>
            </fn>
            <fn id="table9fn3">
              <p><sup>c</sup>BCH: Beijing Children’s Hospital.</p>
            </fn>
            <fn id="table9fn4">
              <p><sup>d</sup>AS: anaphylactic shock.</p>
            </fn>
            <fn id="table9fn5">
              <p><sup>e</sup>DIHS: drug-induced hypersensitivity syndrome.</p>
            </fn>
            <fn id="table9fn6">
              <p><sup>f</sup>SJS: Stevens-Johnson syndrome.</p>
            </fn>
            <fn id="table9fn7">
              <p><sup>g</sup>EB: epidermolysis bullosa.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The results showed that clinical documents were too long to perform document classification baselines. Among the 4 strategies of long document classification, hierarchy representation and key sentence selection were best performed on the smoking task. Moreover, key sentence selection was 9 times faster than hierarchy representation models for inference. The keywords extracted from task-specific guidelines performed better than the unsupervised method. Domain-specific language models always performed better than general embeddings.</p>
        <p>A total of 1155 cases were alerted, among which clinicians and pharmacists identified 357 cases of severe DHRs in children. Only 36 of these cases have been reported by SRS. This result suggested that the reporting rate of SRS may be as low as 10.08%. The automatic pipeline that scrutinized clinical notes and reported potential severe DHR cases can help decrease the number of missed positive DHR cases and reduce the cost of labor at the same time.</p>
        <p>The case analysis also found more suspected drugs associated with severe DHRs in pediatrics. The analysis could help promote postmarketing drug risk assessment conducive to rational drug use and improve drug guidelines.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Our method achieved comparable performance for the smoking task with the top-performing method (94.1% vs 94.2%). For the DHR task, our method discovered 357 positive cases, about 90% of which were missed by SRS.</p>
        <p>Recent work has studied that clinical documents are too long for contextualized language models to process [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. Unlike previous works that only process discharge summaries [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>], this DHR task deals with documents consisting of all clinical notes associated with 1 inpatient visit. The average word length of discharge summaries is typically hundreds of words. However, in the DHR data set, the average word length is up to several thousand Chinese characters, and some documents contain tens of thousands of Chinese characters.</p>
        <p>This work has 4 strategies evaluated and compared: document truncation [<xref ref-type="bibr" rid="ref10">10</xref>], hierarchy representation [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref11">11</xref>], more efficient self-attention [<xref ref-type="bibr" rid="ref12">12</xref>], and key sentence selection [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. None of these works considered the use of guidelines.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>The proposed method required the annotation of about 200 positive cases for supervised training. When applying to the large archive of EHRs in hospital databases, certain preprocessing steps are still required to prevent malfunctions from badly formatted documents. Such preprocessing steps may vary for each hospital’s system.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Automatic identification of severe DHRs can be approached as a document classification problem. The best strategy for long document classification of clinical notes is key sentence selection with task-specific guidelines. The reporting of DHR cases cannot only rely on clinicians in charge. In the same period of data, the SRS system reported 36 cases, whereas the automatic process discovered 357 cases. The case analysis also found more suspected drugs associated with severe DHRs in pediatrics.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Types of drug hypersensitivity reactions and criteria.</p>
        <media xlink:href="medinform_v10i9e37812_app1.docx" xlink:title="DOCX File , 15 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>bidirectional encoder representation from transformer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">DHR</term>
          <def>
            <p>drug hypersensitivity reaction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">NEG</term>
          <def>
            <p>negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">SGD</term>
          <def>
            <p>stochastic gradient descent</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">SRS</term>
          <def>
            <p>spontaneous reporting system</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SVC</term>
          <def>
            <p>support vector classification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">TF-IDF</term>
          <def>
            <p>term frequency-inverse document frequency</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by grant CST2020CT108 from the Clinical Toxicology Program of Chinese Society of Toxicology, grant DSM2021004 from the Post-marketing Drug Risk Assessment Program of China Society for Drug Regulation, and grant CNHDRC-KJ-W-2021-58 from the Clinical Technology Training Program for Comprehensive Evaluation of Pediatric Medication of China National Health and Development Research Center. The funder had no role in conducting the study; collection, management, analysis, and interpretation of data; preparation, review, and approval of the manuscript; or decision to submit the manuscript for publication.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>XLW undertook work of framework design and overall guidance of whole research. YCY, XCW, WC, YML, and YFX took responsibility for the data collection. YCY and QYZ performed the data processing and article writing. QYZ and XLW provided data interpretation and methodological advice.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Naisbitt</surname>
              <given-names>DJ</given-names>
            </name>
          </person-group>
          <article-title>Drug hypersensitivity reactions in skin: understanding mechanisms and the development of diagnostic and predictive tests</article-title>
          <source>Toxicology</source>
          <year>2004</year>
          <month>01</month>
          <day>15</day>
          <volume>194</volume>
          <issue>3</issue>
          <fpage>179</fpage>
          <lpage>196</lpage>
          <pub-id pub-id-type="doi">10.1016/j.tox.2003.09.004</pub-id>
          <pub-id pub-id-type="medline">14687965</pub-id>
          <pub-id pub-id-type="pii">S0300483X03004165</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gomes</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>Brockow</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kuyucu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Saretta</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Mori</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Blanca-Lopez</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Atanaskovic-Markovic</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kidon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Caubet</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Terreehorst</surname>
              <given-names>I</given-names>
            </name>
            <collab>ENDA/EAACI Drug Allergy Interest Group</collab>
          </person-group>
          <article-title>Drug hypersensitivity in children: report from the pediatric task force of the EAACI Drug Allergy Interest Group</article-title>
          <source>Allergy</source>
          <year>2016</year>
          <month>02</month>
          <volume>71</volume>
          <issue>2</issue>
          <fpage>149</fpage>
          <lpage>161</lpage>
          <pub-id pub-id-type="doi">10.1111/all.12774</pub-id>
          <pub-id pub-id-type="medline">26416157</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rukasin</surname>
              <given-names>CRF</given-names>
            </name>
            <name name-style="western">
              <surname>Norton</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Broyles</surname>
              <given-names>AD</given-names>
            </name>
          </person-group>
          <article-title>Pediatric Drug Hypersensitivity</article-title>
          <source>Curr Allergy Asthma Rep</source>
          <year>2019</year>
          <month>02</month>
          <day>22</day>
          <volume>19</volume>
          <issue>2</issue>
          <fpage>11</fpage>
          <pub-id pub-id-type="doi">10.1007/s11882-019-0841-y</pub-id>
          <pub-id pub-id-type="medline">30793223</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11882-019-0841-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lopez-Gonzalez</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Herdeiro</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Figueiras</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Determinants of under-reporting of adverse drug reactions: a systematic review</article-title>
          <source>Drug Saf</source>
          <year>2009</year>
          <volume>32</volume>
          <issue>1</issue>
          <fpage>19</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.2165/00002018-200932010-00002</pub-id>
          <pub-id pub-id-type="medline">19132802</pub-id>
          <pub-id pub-id-type="pii">2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Identifying patient smoking status from medical discharge records</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2008</year>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>14</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=17947624"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M2408</pub-id>
          <pub-id pub-id-type="medline">17947624</pub-id>
          <pub-id pub-id-type="pii">M2408</pub-id>
          <pub-id pub-id-type="pmcid">PMC2274873</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Andriy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Elliot</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Masoud</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Phenotyping of clinical notes with improved document classification models using contextualized neural language models</article-title>
          <source>ArXiv. Preprint posted online on October 30, 2019</source>
          <year>2019</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1910.13664"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Garapati</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rich</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>An interpretable end-to-end fine-tuning approach for long clinical text</article-title>
          <source>ArXiv. Preprint posted online on November 12, 2020</source>
          <year>2020</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2011.06504"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Valmianski</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Goodwin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Finn</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Evaluating robustness of language models for chief complaint extraction from patient-generated text</article-title>
          <source>ArXiv. Preprint posted online on November 15, 2019</source>
          <year>2019</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1911.06915"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="book">
          <source>Guidelines for Medical Nomenclature Use of Adverse Drug Reactions</source>
          <year>2016</year>
          <publisher-loc>Beijing</publisher-loc>
          <publisher-name>National Medical Products Administration</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reimers</surname>
              <given-names>NG</given-names>
            </name>
          </person-group>
          <article-title>Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks</article-title>
          <source>ArXiv. Preprint posted online on August 27, 2019</source>
          <year>2019</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1908.10084"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1410</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pappagari</surname>
              <given-names>RZ</given-names>
            </name>
            <name name-style="western">
              <surname>Villalba</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Carmiel</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dehak</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Hierarchical transformers for long document classification</article-title>
          <source>ArXiv. Preprint posted online on October 23, 2019</source>
          <year>2019</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1910.10781"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/asru46091.2019.9003958</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beltagy</surname>
              <given-names>IP</given-names>
            </name>
            <name name-style="western">
              <surname>Cohan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Longformer: the long-document transformer</article-title>
          <source>ArXiv. Preprint posted online on April 10, 2020</source>
          <year>2020</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2004.05150"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>MZ</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Cogltx: applying bert to long texts</article-title>
          <source>Adv Neural Inf Process Syst 33</source>
          <access-date>2022-08-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper/2020/file/96671501524948bc3937b4b30d0e57b9-Paper.pdf">https://proceedings.neurips.cc/paper/2020/file/96671501524948bc3937b4b30d0e57b9-Paper.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fiok</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Karwowski</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gutierrez-Franco</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Davahli</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Wilamowski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ahram</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Juaid</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zurada</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Text guide: improving the quality of long text classification by a text selection method based on feature importance</article-title>
          <source>IEEE Access</source>
          <year>2021</year>
          <volume>9</volume>
          <fpage>105439</fpage>
          <lpage>105450</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2021.3099758</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Park</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Vyas</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Efficient classification of long documents using transformers</article-title>
          <source>ArXiv. Preprint posted online on March 21, 2022</source>
          <year>2021</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2203.11258"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedersen</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <source>Determining smoker status using supervised and unsupervised learning with lexical features</source>
          <access-date>2022-08-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.116.1948&#38;rep=rep1&#38;type=pdf">https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.116.1948&#38;rep=rep1&#38;type=pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Recommendations in guideline for emergency management of anaphylaxis</article-title>
          <source>Adverse Drug React J</source>
          <year>2019</year>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>85</fpage>
          <lpage>91</lpage>
          <pub-id pub-id-type="doi">10.1201/9780429083129-12</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Allergic Diseases Committee</collab>
          </person-group>
          <article-title>Expert consensus on diagnosis and treatment of drug hypersensitivity syndrome</article-title>
          <source>Chin J Dermatol</source>
          <year>2018</year>
          <volume>51</volume>
          <issue>11</issue>
          <fpage>787</fpage>
          <lpage>790</lpage>
          <pub-id pub-id-type="doi">10.3760/cma.j.issn.0412-4030.2018.11.002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Adverse Drug Reaction Research Center of Chinese Society of Dermatology</collab>
          </person-group>
          <article-title>Expert consensus on the diagnosis and treatment of Stevens-Johnson syndrome/toxic epidermal necrolysis</article-title>
          <source>Chin J Dermatol</source>
          <year>2021</year>
          <month>5</month>
          <day>15</day>
          <volume>54</volume>
          <issue>5</issue>
          <fpage>376</fpage>
          <lpage>381</lpage>
          <pub-id pub-id-type="doi">10.35541/cjd.20201177</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alsentzer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Boag</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Naumann</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Publicly Available Clinical BERT Embeddings</article-title>
          <source>ArXiv. Preprint posted online on April 6, 2019</source>
          <year>2019</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1904.03323"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/w19-1909</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Turc</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kristina</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Well-read students learn better: on the importance of pre-training compact models</article-title>
          <source>ArXiv. Preprint posted online on August 23, 2019</source>
          <year>2019</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1908.08962"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <source>trueto: research and application of BERT model in Chinese clinical Natural language processing</source>
          <year>2021</year>
          <access-date>2021-03-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/trueto/medbert">https://github.com/trueto/medbert</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Good</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jezierny</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Macpherson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chajewska</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Identifying smokers with a medical extraction system</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2008</year>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>36</fpage>
          <lpage>39</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/17947619"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M2442</pub-id>
          <pub-id pub-id-type="medline">17947619</pub-id>
          <pub-id pub-id-type="pii">M2442</pub-id>
          <pub-id pub-id-type="pmcid">PMC2274874</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Atkinson</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Amin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A clinical text classification paradigm using weak supervision and deep representation</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <month>01</month>
          <day>07</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-018-0723-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-018-0723-6</pub-id>
          <pub-id pub-id-type="medline">30616584</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-018-0723-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6322223</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
