<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i12e23357</article-id>
      <article-id pub-id-type="pmid">33372664</article-id>
      <article-id pub-id-type="doi">10.2196/23357</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Using Character-Level and Entity-Level Representations to Enhance Bidirectional Encoder Representation From Transformers-Based Clinical Semantic Textual Similarity Model: ClinicalSTS Modeling Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Wang</surname>
            <given-names>Yanshan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Yang</surname>
            <given-names>Xi</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Manzanares</surname>
            <given-names>Maria</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Memon</surname>
            <given-names>Muhammad</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Xiong</surname>
            <given-names>Ying</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7423-2937</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Shuai</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5739-9022</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Qingcai</given-names>
          </name>
          <degrees>PhD, Prof Dr</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8473-7293</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Yan</surname>
            <given-names>Jun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2497-5518</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Tang</surname>
            <given-names>Buzhou</given-names>
          </name>
          <degrees>PhD, Prof Dr</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Harbin Institute of Technology</institution>
            <addr-line>HIT Campus, Xili University Town</addr-line>
            <addr-line>Shenzhen, 518055</addr-line>
            <country>China</country>
            <phone>86 075526033182</phone>
            <email>tangbuzhou@gmail.com</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0271-8246</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Harbin Institute of Technology</institution>
        <addr-line>Shenzhen</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Peng Cheng Laboratory</institution>
        <addr-line>Shenzhen</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Yidu Cloud Technology Company Limited</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Buzhou Tang <email>tangbuzhou@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>12</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>29</day>
        <month>12</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>12</issue>
      <elocation-id>e23357</elocation-id>
      <history>
        <date date-type="received">
          <day>10</day>
          <month>8</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>22</day>
          <month>9</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>10</day>
          <month>11</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>16</day>
          <month>11</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Ying Xiong, Shuai Chen, Qingcai Chen, Jun Yan, Buzhou Tang. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 29.12.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2020/12/e23357/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>With the popularity of electronic health records (EHRs), the quality of health care has been improved. However, there are also some problems caused by EHRs, such as the growing use of copy-and-paste and templates, resulting in EHRs of low quality in content. In order to minimize data redundancy in different documents, Harvard Medical School and Mayo Clinic organized a national natural language processing (NLP) clinical challenge (n2c2) on clinical semantic textual similarity (ClinicalSTS) in 2019. The task of this challenge is to compute the semantic similarity among clinical text snippets.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>In this study, we aim to investigate novel methods to model ClinicalSTS and analyze the results.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We propose a semantically enhanced text matching model for the 2019 n2c2/Open Health NLP (OHNLP) challenge on ClinicalSTS. The model includes 3 representation modules to encode clinical text snippet pairs at different levels: (1) character-level representation module based on convolutional neural network (CNN) to tackle the out-of-vocabulary problem in NLP; (2) sentence-level representation module that adopts a pretrained language model bidirectional encoder representation from transformers (BERT) to encode clinical text snippet pairs; and (3) entity-level representation module to model clinical entity information in clinical text snippets. In the case of entity-level representation, we compare 2 methods. One encodes entities by the entity-type label sequence corresponding to text snippet (called entity I), whereas the other encodes entities by their representation in MeSH, a knowledge graph in the medical domain (called entity II).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We conduct experiments on the ClinicalSTS corpus of the 2019 n2c2/OHNLP challenge for model performance evaluation. The model only using BERT for text snippet pair encoding achieved a Pearson correlation coefficient (PCC) of 0.848. When character-level representation and entity-level representation are individually added into our model, the PCC increased to 0.857 and 0.854 (entity I)/0.859 (entity II), respectively. When both character-level representation and entity-level representation are added into our model, the PCC further increased to 0.861 (entity I) and 0.868 (entity II).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Experimental results show that both character-level information and entity-level information can effectively enhance the BERT-based STS model.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>deep learning</kwd>
        <kwd>clinical semantic textual similarity</kwd>
        <kwd>knowledge graph</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Electronic health record (EHR) systems have been widely used in hospitals all over the world for convenience to health information storage, share, and exchange [<xref ref-type="bibr" rid="ref1">1</xref>]. In recent years, EHRs have become a key data source for medical research and clinical decision support. Therefore, the quality of EHRs is crucial. However, copy-and-paste and templates are very common in EHR writing [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>], resulting in EHRs of low quality in content. How to detect copy-and-paste and templates in different documents has become increasingly important for the secondary use of EHRs. This can be regarded as a clinical semantic textual similarity (ClinicalSTS) task, which is also applied to clinical decision support, trial recruitment, tailored care, clinical research [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>], and medical information services, such as clinical question answering [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>] and document classification [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <p>In the past few years, some shared tasks on STS, such as Semantic Evaluation (SemEval), have been launched by different organizers [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. These shared tasks mainly focus on general domains, including newswire, tutorial dialog system, Wikipedia, among others. There has been almost no study on STS in the clinical domain. To boost the development of ClinicalSTS, Wang et al [<xref ref-type="bibr" rid="ref15">15</xref>] constructed a clinical STS corpus of 174,629 clinical text snippet pairs from Mayo Clinic. Based on a part of this corpus, BioCreative/OHNLP organizers held the first ClinicalSTS shared pilot task (challenge) in 2018 [<xref ref-type="bibr" rid="ref16">16</xref>]. A corpus of 1068 clinical text snippet pairs with similarity ranging from 0 to 5 was provided for this shared task. In 2019, the n2c2/OHNLP organizers extended the 2018 shared task corpus and continued to hold ClinicalSTS shared task [<xref ref-type="bibr" rid="ref17">17</xref>]. The extended corpus is composed of 2055 clinical text snippet pairs.</p>
        <p>In this paper, we introduce our system developed for the 2019 n2c2/OHNLP shared task on ClinicalSTS. The system is based on bidirectional encoder representation from transformers (BERT) [<xref ref-type="bibr" rid="ref18">18</xref>] and includes the 2 other types of representations besides BERT: (1) character-level representation to tackle the out-of-vocabulary (OOV) problem in natural language processing (NLP) and (2) entity-level representation to model clinical entity information in clinical text snippets. In the case of entity-level representation, we apply 2 entity-level representations: one encodes entities in a text snippet by the corresponding entity label sequence (called entity I) and the other one encodes entities with their representation on Mesh [<xref ref-type="bibr" rid="ref19">19</xref>] (called entity II). Our system achieves the highest Pearson correlation coefficient (PCC) of 0.868 on the corpus of the 2019 n2c2/OHNLP track on ClinicalSTS, which is competitive with other state-of-the-art systems.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <p>A model for STS usually consists of 2 modules: a module to encode text snippet (or sentence) pairs and a module for prediction (classification or regression). According to sentence pair encoding, STS models can be classified into the following 2 categories: sentence encoding models and sentence pair interaction models. The sentence encoding models first use Siamese neural network to individually encode 2 sentences with 2 neural networks of the same structure and shared parameters [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref23">23</xref>], then combine the 2 sentences’ representation through concatenation, element-wise product, or element-wise difference operations, and finally make a classification or regression prediction via a specific layer such as multilayer perceptron (MLP) [<xref ref-type="bibr" rid="ref24">24</xref>]. The main limitation of the sentence pair encoding models is that they ignore word-level interactions. The sentence pair interaction models adopt matching-aggregation architectures to encode word-level interactions [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. These models first build an interaction matrix and then use a convolutional neural network (CNN) [<xref ref-type="bibr" rid="ref27">27</xref>] and long short-term memory [<xref ref-type="bibr" rid="ref28">28</xref>] with attention mechanism [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>] and hierarchical architecture [<xref ref-type="bibr" rid="ref31">31</xref>] to obtain aggregated matching representation for final prediction.</p>
        <p>In recent years, pretrained language models good at capturing sentence-level semantic information, such as BERT [<xref ref-type="bibr" rid="ref18">18</xref>], XLNet [<xref ref-type="bibr" rid="ref32">32</xref>], RoBERTa [<xref ref-type="bibr" rid="ref33">33</xref>], have been proved to significantly improve downstream tasks. However, most pretrained language models are at the token level. In order to tackle the inherent OOV problem of NLP, character-level representation is also considered in various NLP tasks, such as named entity recognition [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>] and entity normalization [<xref ref-type="bibr" rid="ref37">37</xref>], and brings improvements. Besides, researchers have started investigating how to use entity-level representation in NLP tasks [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Set</title>
        <p>The n2c2/OHNLP organizers manually annotated a total of 2055 clinical text snippet pairs by 2 medical experts for the ClinicalSTS task, where 1643 pairs are used as the training set and 412 as the test set. The similarity of each clinical text snippet pair is measured by PCC ranging from 0 to 5, where 0 means that 2 clinical text snippets are absolutely different, and 5 means that 2 clinical text snippets are entirely semantically equal. All clinical text snippets are selected from deidentified EHRs. <xref ref-type="table" rid="table1">Table 1</xref> gives examples of each score.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Examples of ClinicalSTS<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="30"/>
            <col width="870"/>
            <thead>
              <tr valign="top">
                <td>Score</td>
                <td colspan="2">Example of clinical text snippet pair</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0</td>
                <td colspan="2">
                  <bold>The 2 sentences are completely dissimilar</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="2">
                  <break/>
                </td>
                <td rowspan="2">
                  <break/>
                </td>
                <td>S1: The patient has missed 0 hours of work in the past seven days for issues not related to depression.</td>
              </tr>
              <tr valign="top">
                <td>S2: In the past year the patient has the following number of visits: none in the hospital none in the er and one as an outpatient.</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td colspan="2">
                  <bold>The 2 sentences are not equivalent but have the same topic</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="2">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>S1: There is no lower extremity edema present bilaterally.</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>S2: There is a 2+ radial pulse present in the upper extremities bilaterally.</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td colspan="2">
                  <bold>The 2 sentences are not equivalent but share some details</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="2">
                  <break/>
                </td>
                <td rowspan="2">
                  <break/>
                </td>
                <td>S1: I met with the charge nurse and reviewed the patient's clinical condition.</td>
              </tr>
              <tr valign="top">
                <td>S2: I have reviewed the relevant imaging and medical record.</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td colspan="2">
                  <bold>The 2 sentences are roughly equivalent but some important information differs</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="2">
                  <break/>
                </td>
                <td rowspan="2">
                  <break/>
                </td>
                <td>S1: I explained the diagnosis and treatment plan in detail, and the patient clearly expressed understanding of the content reviewed.</td>
              </tr>
              <tr valign="top">
                <td>S2: Began discussion of diagnosis and treatment of chronic pain and chronic fatigue; patient expressed understanding of the content.</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td colspan="2">
                  <bold>The 2 sentences are mostly equivalent and only a little detail is different</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="2">
                  <break/>
                </td>
                <td rowspan="2">
                  <break/>
                </td>
                <td>S1: Albuterol [PROVENTIL/VENTOLIN] 90 mcg/Act HFA Aerosol 2 puffs by inhalation every 4 hours as needed.</td>
              </tr>
              <tr valign="top">
                <td>S2: Albuterol [PROVENTIL/VENTOLIN] 90 mcg/Act HFA Aerosol 1-2 puffs by inhalation every 4 hours as needed #1 each.</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td colspan="2">
                  <bold>The 2 sentences mean the same thing, they are absolutely equivalent</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="2">
                  <break/>
                </td>
                <td rowspan="2">
                  <break/>
                </td>
                <td>S1: Goals/Outcomes: Patient will be instructed in a home program, demonstrate understanding, and state the ability to continue independently.</td>
              </tr>
              <tr valign="top">
                <td>S2: Patient will be instructed in home program, demonstrate understanding, and state ability to continue independently-ongoing.</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>ClinicalSTS: clinical semantic textual similarity.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Models</title>
        <p><xref rid="figure1" ref-type="fig">Figure 1</xref> presents an overview architecture of our model. In this model, we first use 3 representation modules at different levels to encode input text snippet pairs, that is, character-level, sentence-level, and entity-level representation modules, and then feed them to MLP for prediction.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview architecture of our model for the ClinicalSTS track of the 2019 n2c2/OHNLP challenge. BERT: bidirectional encoder representation from transformers; ClinicalSTS: clinical semantic textual similarity; CNN: convolutional neural network; MLP: multilayer perceptron; PCC: Pearson correlation coefficient; [CLS]: the representation of sentence pair with BERT.</p>
          </caption>
          <graphic xlink:href="medinform_v8i12e23357_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>Character-Level Representation</title>
          <p>In order to tackle the OOV problem in NLP, following [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref37">37</xref>], given a pair of clinical text snippets (a, b), we first apply character-level CNN on each token to obtain its character-level representation, and then apply max pooling operation on all tokens in a and b to obtain the character-level representation of (a, b), denoted by C. We model the character-level representation with CNN, because there is no significant difference in using CNN and long short-term memory, according to previous studies [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>].</p>
        </sec>
        <sec>
          <title>Sentence-Level Representation</title>
          <p>We use BERT to encode the input clinical text snippet pair (a, b) and obtain its sentence-level representation, denoted by S = BERT(a, b).</p>
        </sec>
        <sec>
          <title>Entity-Level Representation</title>
          <p>We first deploy cTAKES [<xref ref-type="bibr" rid="ref42">42</xref>], a popular clinical NLP tool, to extract entity mentions from text snippets, and then propose 2 methods to obtain the entity-level representations of the text snippets according to the extracted entity mentions, as shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. cTAKES can extract 9 kinds of entities: AnatomicalSiteMention, DiseaseDisorderMention, FractionAnnotation, MedicationMention, Predicate, ProcedureMention, RomanNumeralAnnotation, SignSymptomMention, and Temporal Information.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Entity-level representation.</p>
            </caption>
            <graphic xlink:href="medinform_v8i12e23357_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>In the first method for entity-level representation (entity I), we convert text snippet a and b into entity-type sequences corresponding to them, and then deploy attention-based CNN [<xref ref-type="bibr" rid="ref27">27</xref>] on the pair of the entity-type sequences in the following way:</p>
          <disp-formula>E = BCNN(es<sub>a</sub>, es<sub>b</sub>) (1)</disp-formula>
          <p>where es<sub>a</sub> is the entity label sequence of text snippet a, es<sub>b</sub> is the entity label sequence of text snippet b, BCNN is basic bi-CNN, and E is the entity-level representation of (es<sub>a</sub>, es<sub>b</sub>). For example, given a text snippet b “Zocor 40 mg tablet 1 tablet by mouth one time daily.” shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>, cTAKES first extracts 3 medication mentions {“Zocor”, “tablet”, “tablet”} and 1 anatomical mention {“mouth”}, and then we obtain the entity-type sequence corresponding to text snippet b: “MedicationMetion O O MedicationMetion O MedicationMetion O AnatomicalSiteMention O O O O”. In this entity-type sequence, “O” stands for “Other.”</p>
          <p>The second method for entity-level representation (entity II) first directly adopts entity representation learned by TransE [<xref ref-type="bibr" rid="ref43">43</xref>] on an external knowledge graph (KG; Mesh in this study), and then applies average pooling operation on all entities individually in sentences a and b to get entity-level representations of a (denoted by eg<sub>a</sub>) and b (denoted by eg<sub>b</sub>) respectively, and finally aggregates their representations using equation 2.</p>
          <disp-formula>E = tanh (W<sub>e</sub>[eg<sub>a</sub> – eg<sub>b</sub>; eg<sub>a</sub> * eg<sub>b</sub>] + b<sub>e</sub>) (2)</disp-formula>
          <p>where “[;]” denotes concatenation operation, W<sub>e</sub> is a weight matrix, and b<sub>e</sub> is a bias vector.</p>
        </sec>
        <sec>
          <title>MLP Layer</title>
          <p>To aggregate the information of 3 modules, we concatenate them together:</p>
          <disp-formula>f = [S; C; E] (3)</disp-formula>
          <p>Then, we use MLP (as shown in equation 4) to predict the STS score p<sub>score</sub> of (a, b) as follows:</p>
          <disp-formula>p<sub>score</sub> = MLP(Wf + b) (4)</disp-formula>
          <p>where W is a weight matrix, and b is a bias vector.</p>
          <p>The loss function used in our model is the minimum square error (MSE) function:</p>
          <disp-formula>Loss = MSE(p<sub>score</sub> – g<sub>score</sub>) (5)</disp-formula>
          <p>where g<sub>score</sub> is the gold-standard score.</p>
        </sec>
        <sec>
          <title>Experimental Setting</title>
          <p>Before conducting experiments, we preprocess the corpus using the following simple rules: (1) convert clinical text snippets into lowercase; (2) tokenize clinical text snippets using special symbols, such as “[”, “]”, “/”, “,”, and “.”, and keep them unstained in some situations such as “.” in decimals. The hyperparameters of our model are shown in <xref ref-type="table" rid="table2">Table 2</xref>. Other parameters are optimized via fivefold cross validation on the training set. The pretrained BERT model used for text snippet pair representation in our experiments is [BERT-Base, Uncased] [<xref ref-type="bibr" rid="ref44">44</xref>]. We train all model parameters simultaneously, set epochs as 12, and save the last checkpoints as the final models. The performance of all models is measured by PCC.</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Hyperparameters setting of our model.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="500"/>
              <col width="500"/>
              <thead>
                <tr valign="top">
                  <td>Parameters</td>
                  <td>Value</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Learning rate</td>
                  <td>2 × 10<sup>–5</sup></td>
                </tr>
                <tr valign="top">
                  <td>Sequence length of BERT<sup>a</sup></td>
                  <td>380</td>
                </tr>
                <tr valign="top">
                  <td>Epochs</td>
                  <td>12</td>
                </tr>
                <tr valign="top">
                  <td>Batch size</td>
                  <td>20</td>
                </tr>
                <tr valign="top">
                  <td>Knowledge graph embedding dimension d</td>
                  <td>100</td>
                </tr>
                <tr valign="top">
                  <td>Character-level kernel size</td>
                  <td>3</td>
                </tr>
                <tr valign="top">
                  <td>Convolution kernels of BCNN<sup>b</sup></td>
                  <td>50</td>
                </tr>
                <tr valign="top">
                  <td>Kernel size of BCNN</td>
                  <td>3</td>
                </tr>
                <tr valign="top">
                  <td>Word embedding dimension of entity I</td>
                  <td>50</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table2fn1">
                <p><sup>a</sup>BERT: bidirectional encoder representation from transformers.</p>
              </fn>
              <fn id="table2fn2">
                <p><sup>b</sup>BCNN: Basic bi-CNN.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p><xref ref-type="table" rid="table3">Table 3</xref> shows the overall results of our proposed model. Our model achieves the highest PCC of 0.868, which is competitive with other state-of-the-art models proposed for the 2019 n2c2/OHNLP track on ClinicalSTS. The model using entity II is better than that using entity I by 0.007 in PCC, indicating that entity II is a better supplement to BERT than entity I. When character-level representation is removed, the PCC of our model decreases to 0.859 (entity I) and 0.854 (entity II). When entity-level representation is removed, the PCC of our model decreases to 0.858. When both types of representations are removed, the PCC of our model further decreases to 0.848. The results indicate that both character-level representation and entity-level representation are supplementary to BERT. Although the improvements individually from entity I and character-level text snippet representation are more remarkable than entity II, the improvement from the combination of entity I and character-level representation is much smaller than the combination of entity II and character-level representation. It is because both character-level representation and entity I come from text snippets, whereas entity II comes from external KG. The diversity between character-level representation and entity II is much larger than that between character-level representation and entity I. It is interesting that our model is not further improved when both entity I and entity II are considered in our model at the same time, which may be also because of the diversity.</p>
      <p>Moreover, we investigate the effect of the domain-specific pretrained BERT models [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>] on our model. We replace the pretrained BERT model in the general domain, [BERT-Base, Uncased] [<xref ref-type="bibr" rid="ref44">44</xref>], by the pretrained BERT model in the clinical domain [<xref ref-type="bibr" rid="ref45">45</xref>] to obtain a new model. The highest PCC of the new model is 0.872, which is slightly better than our previous model, indicating that the domain-specific pretrained BERT model is beneficial to our model.</p>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Pearson correlation coefficient of our model on the test set.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="470"/>
          <col width="500"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Model and setting</td>
              <td>PCC<sup>a</sup></td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="2">
                <bold>Our model</bold>
              </td>
              <td>
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td rowspan="3">
                <break/>
              </td>
              <td>Entity I</td>
              <td>0.861</td>
            </tr>
            <tr valign="top">
              <td>Entity II</td>
              <td>0.868<sup>b</sup></td>
            </tr>
            <tr valign="top">
              <td>Entity I + Entity II</td>
              <td>0.862</td>
            </tr>
            <tr valign="top">
              <td colspan="2">
                <bold>Without</bold>
                <bold>character</bold>
                <bold>-level text snippet representation</bold>
              </td>
              <td>
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td rowspan="2">
                <break/>
              </td>
              <td>Entity I</td>
              <td>0.859</td>
            </tr>
            <tr valign="top">
              <td>Entity II</td>
              <td>0.854</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Without entity-level representation</td>
              <td>0.858</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Without both</td>
              <td>0.848</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup>PCC: Pearson correlation coefficient.</p>
          </fn>
          <fn id="table3fn2">
            <p><sup>b</sup>The highest PCC.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Error Analysis</title>
        <p>Although the proposed model achieves competitive performance, there are also some errors. To analyze these errors, we look into samples for which the difference between the predicted STS score and gold-standard similarity score is greater than 1.0 and find that the main errors can be classified into 2 types.</p>
        <p>The first type of error is related to polarity of clinical text snippets as our model is insensitive to positive and negative words. For example, as shown in <xref ref-type="table" rid="table4">Table 4</xref>, because both clinical text snippets in example 1 depict coughing up, their STS score predicted by our model is 2.5, but their gold-standard STS score is 1.0 as the polarity of the first text snippet is positive, whereas that of the second text snippet is negative. The second type of error is related to prescriptions that include medication names, usages, and dosages. For example, the gold-standard STS score of example 2 in <xref ref-type="table" rid="table4">Table 4</xref> is 1.0 as the medications in the 2 text snippets are completely different, but the STS score of the example predicted by our model is 2.5 as some other words are the same in the 2 text snippets. Just because our model cannot extract medical information comprehensively, there are lots of errors of the second type. For further improvement, we need a comprehensive information extraction module to extract polarity information and medications with usage and dosage attributes besides the current 9 kinds of clinical entities. A possible way is to integrate the existing tools specifically for polarity information extraction (such as SenticNet [<xref ref-type="bibr" rid="ref47">47</xref>]) or medication extraction (such as MedEx [<xref ref-type="bibr" rid="ref48">48</xref>]) into our model. We also find that the scores of mispredictions are close to 2.5, which may be caused by the different STS score distributions of the training and test sets. As shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>, the STS scores of most sentence pairs in the training set concentrate in [2.5, 3.5], whereas those in the test set concentrate in [0.5, 1.5]. The difference is remarkable. It is reasonable to obtain the STS scores of mispredictions around the average score of the training set.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Examples of errors on the test set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="800"/>
            <thead>
              <tr valign="top">
                <td>Number</td>
                <td>Example</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p><italic>Sentence 1:</italic><italic>respiratory: positive for coughing up mucus (phlegm), dyspnea and wheezing</italic>.</p>
                    </list-item>
                    <list-item>
                      <p><italic>Sentence 2: negative for coughing up blood and dry cough</italic>.</p>
                    </list-item>
                    <list-item>
                      <p>Gold-standard: 1.0</p>
                    </list-item>
                    <list-item>
                      <p>Predicted: 2.5</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>
                        <italic>Sentence 1: ibuprofen [motrin] 800 mg tablet 1 tablet by mouth four time a day as needed.</italic>
                      </p>
                    </list-item>
                    <list-item>
                      <p>
                        <italic>Sentence 2: lisinopril 10 mg tablet 1 tablet by mouth one time daily.</italic>
                      </p>
                    </list-item>
                    <list-item>
                      <p>Gold-standard: 1.0</p>
                    </list-item>
                    <list-item>
                      <p>Predict: 2.4</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Similarity interval distribution in the training and test data sets.</p>
          </caption>
          <graphic xlink:href="medinform_v8i12e23357_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Effect of Entity-Level Representation</title>
        <p>Although the results in <xref ref-type="table" rid="table3">Table 3</xref> show that any one of the 2 entity-level representations enhances the BERT-based model, some limitations also exist. In the case of entity I, we only consider type semantic information, but no entity semantic information. In the case of entity II, only about 20% (220/1080) of clinical entities recognized by cTAKES [<xref ref-type="bibr" rid="ref42">42</xref>] can be mapped to Mesh via dictionary look-up. There are 2 directions for improvement: (1) introduce entity semantic information into entity I, and (2) improve entity mapping performance in entity II and find a larger KG instead of Mesh.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this paper, we propose an enhanced BERT-based model for ClinicalSTS by introducing a character-level representation and an entity-level representation. Experiments on the 2019 n2c2/OHNLP track on ClinicalSTS in 2019 indicate that both the character-level representation and the entity-level representation can enhance the BERT-based ClinicalSTS model, and our enhanced BERT-based model achieves competitive performance with other state-of-the-art models. In addition, domain-specific pretrained BERT models are better than general pretrained BERT models.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>bidirectional encoder representation from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ClinicalSTS</term>
          <def>
            <p>clinical semantic textual similarity</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">KG</term>
          <def>
            <p>knowledge graph</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MLP</term>
          <def>
            <p>multilayer perceptron</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">OHNLP</term>
          <def>
            <p>Open Health Natural Language Processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">OOV</term>
          <def>
            <p>out of vocabulary</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">PCC</term>
          <def>
            <p>Pearson correlation coefficient</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">SemEval</term>
          <def>
            <p>Semantic Evaluation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">STS</term>
          <def>
            <p>semantic textual similarity</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This paper is supported in part by grants: National Natural Science Foundations of China (U1813215, 61876052, and 61573118), Special Foundation for Technology Research Program of Guangdong Province (2015B010131010), National Natural Science Foundations of Guangdong, China (2019A1515011158), Guangdong Province Covid-19 Pandemic Control Research Fund (2020KZDZX1222), Strategic Emerging Industry Development Special Funds of Shenzhen (JCYJ20180306172232154 and JCYJ20170307150528934), and Innovation Fund of Harbin Institute of Technology (HIT.NSRIF.2017052).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Electronic Health Records: Then, Now, and in the Future</article-title>
          <source>Yearb Med Inform</source>
          <year>2016</year>
          <month>05</month>
          <day>20</day>
          <volume>Suppl 1</volume>
          <fpage>S48</fpage>
          <lpage>61</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.thieme-connect.com/DOI/DOI?10.15265/IYS-2016-s006"/>
          </comment>
          <pub-id pub-id-type="doi">10.15265/IYS-2016-s006</pub-id>
          <pub-id pub-id-type="medline">27199197</pub-id>
          <pub-id pub-id-type="pii">me2016-s006</pub-id>
          <pub-id pub-id-type="pmcid">PMC5171496</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Markel</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Copy and paste of electronic health records: a modern medical illness</article-title>
          <source>Am J Med</source>
          <year>2010</year>
          <month>05</month>
          <volume>123</volume>
          <issue>5</issue>
          <fpage>e9</fpage>
          <pub-id pub-id-type="doi">10.1016/j.amjmed.2009.10.012</pub-id>
          <pub-id pub-id-type="medline">20399309</pub-id>
          <pub-id pub-id-type="pii">S0002-9343(09)01104-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kettl</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>A Piece of My Mind</article-title>
          <source>JAMA</source>
          <year>1992</year>
          <month>02</month>
          <day>12</day>
          <volume>267</volume>
          <issue>6</issue>
          <fpage>798</fpage>
          <pub-id pub-id-type="doi">10.1001/jama.1992.03480060040014</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Toti</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Morley</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ibrahim</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Folarin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kartoglu</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Agrawal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stringer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gale</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gorrell</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Broadbent</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>SemEHR: A general-purpose semantic search system to surface semantic data from clinical notes for tailored care, trial recruitment, and clinical research</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2018</year>
          <month>05</month>
          <day>01</day>
          <volume>25</volume>
          <issue>5</issue>
          <fpage>530</fpage>
          <lpage>537</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29361077"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocx160</pub-id>
          <pub-id pub-id-type="medline">29361077</pub-id>
          <pub-id pub-id-type="pii">4817428</pub-id>
          <pub-id pub-id-type="pmcid">PMC6019046</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hanauer</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Murkowski-Steffy</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Vydiswaran</surname>
              <given-names>VV</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Development and empirical user-centered evaluation of semantically-based query recommendation for an electronic health record search engine</article-title>
          <source>J Biomed Inform</source>
          <year>2017</year>
          <month>03</month>
          <volume>67</volume>
          <fpage>1</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30017-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.01.013</pub-id>
          <pub-id pub-id-type="medline">28131722</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30017-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC5378386</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Plaza</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Díaz</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Hopfe</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rezgui</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Métais</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Preece</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Retrieval of Similar Electronic Health Records Using UMLS Concept Graphs</article-title>
          <source>Natural Language Processing and Information Systems. NLDB 2010. Lecture Notes in Computer Science, vol 6177</source>
          <year>2010</year>
          <publisher-loc>Berlin, Germany</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>293</fpage>
          <lpage>303</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Simpson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Antieau</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cimino</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ely</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>AskHERMES: An online question answering system for complex clinical questions</article-title>
          <source>J Biomed Inform</source>
          <year>2011</year>
          <month>04</month>
          <volume>44</volume>
          <issue>2</issue>
          <fpage>277</fpage>
          <lpage>88</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(11)00006-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2011.01.004</pub-id>
          <pub-id pub-id-type="medline">21256977</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(11)00006-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC3433744</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Answer extraction, semantic clustering, and extractive summarization for clinical question answering</article-title>
          <source>Proceedings of the 21st International Conference on Computational Linguistics and the 44th Annual Meeting of the Association for Computational Linguistics Association for Computational Linguistics</source>
          <year>2006</year>
          <month>7</month>
          <conf-name>21st International Conference on Computational Linguistics and the 44th Annual Meeting of the Association for Computational Linguistics Association for Computational Linguistics</conf-name>
          <conf-date>2006</conf-date>
          <conf-loc>Sydney, Australia</conf-loc>
          <fpage>841</fpage>
          <lpage>848</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.3115/1220175.1220281"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/1220175.1220281</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stubbs</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Filannino</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Soysal</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Henry</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>Ö</given-names>
            </name>
          </person-group>
          <article-title>Cohort selection for clinical trials: n2c2 2018 shared task track 1</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2019</year>
          <month>11</month>
          <day>01</day>
          <volume>26</volume>
          <issue>11</issue>
          <fpage>1163</fpage>
          <lpage>1171</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31562516"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocz163</pub-id>
          <pub-id pub-id-type="medline">31562516</pub-id>
          <pub-id pub-id-type="pii">5575392</pub-id>
          <pub-id pub-id-type="pmcid">PMC6798568</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agirre</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Diab</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Agirre</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Semeval-2012 task 6: A pilot on semantic textual similarity</article-title>
          <source>SemEval '12: Proceedings of the First Joint Conference on Lexical and Computational Semantics - Volume 1: Proceedings of the main conference and the shared task, and Volume 2: Proceedings of the Sixth International Workshop on Semantic Evaluation</source>
          <year>2012</year>
          <month>6</month>
          <day>7</day>
          <conf-name>SemEval '12</conf-name>
          <conf-date>2012</conf-date>
          <conf-loc>Montréal, Canada</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>ACM</publisher-name>
          <fpage>385</fpage>
          <lpage>393</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/2387636.2387697"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/s17-2001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agirre</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Diab</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Agirre</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>SEM 2013 shared task: Semantic Textual Similarity</article-title>
          <year>2013</year>
          <month>6</month>
          <day>13</day>
          <conf-name>Human Language Technology: Conference of the North American Chapter of the Association of Computational Linguistics (HLT-NAACL)</conf-name>
          <conf-date>2013</conf-date>
          <conf-loc>Atlanta, GA</conf-loc>
          <fpage>32</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/s17-2001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agirre</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Banea</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cardie</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Diab</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Agirre</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Mihalcea</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rigau</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wiebe</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>SemEval-2014 Task 10: Multilingual Semantic Textual Similarity</article-title>
          <year>2014</year>
          <month>8</month>
          <day>23</day>
          <conf-name>International Conference on Computational Linguistics (COLING)</conf-name>
          <conf-date>August 23-24, 2014</conf-date>
          <conf-loc>Dublin, Ireland</conf-loc>
          <fpage>81</fpage>
          <lpage>91</lpage>
          <pub-id pub-id-type="doi">10.3115/v1/s14-2010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agirre</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Banea</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cardie</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Diab</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Agirre</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez-Gazpio</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Maritxalar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mihalcea</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>SemEval-2015 Task 2: Semantic Textual Similarity, English, Spanish and Pilot on Interpretability</article-title>
          <year>2015</year>
          <month>6</month>
          <day>4</day>
          <conf-name>Human Language Technology: Conference of the North American Chapter of the Association of Computational Linguistics (HLT-NAACL)</conf-name>
          <conf-date>June 4-5, 2015</conf-date>
          <conf-loc>Denver, CO</conf-loc>
          <fpage>252</fpage>
          <lpage>263</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/S15-2045</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agirre</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Banea</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Diab</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>González-Agirre</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mihalcea</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rigau</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wiebe</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>SemEval-2016 Task 1: Semantic Textual Similarity, Monolingual and Cross-Lingual Evaluation</article-title>
          <year>2016</year>
          <month>6</month>
          <day>2</day>
          <conf-name>Human Language Technology: Conference of the North American Chapter of the Association of Computational Linguistics (HLT-NAACL)</conf-name>
          <conf-date>June 16-17, 2016</conf-date>
          <conf-loc>San Diego, CA</conf-loc>
          <fpage>497</fpage>
          <lpage>511</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/s16-1081</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>MedSTS: a resource for clinical semantic textual similarity</article-title>
          <source>Lang Resources &#38; Evaluation</source>
          <year>2018</year>
          <month>10</month>
          <day>24</day>
          <volume>54</volume>
          <issue>1</issue>
          <fpage>57</fpage>
          <lpage>72</lpage>
          <pub-id pub-id-type="doi">10.1007/s10579-018-9431-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Overview of the BioCreative/OHNLP challenge 2018 Task 2: Clinical Semantic Textual Similarity</article-title>
          <source>Proceedings of the BioCreative/OHNLP Challenge 2018</source>
          <year>2018</year>
          <conf-name>BioCreative/OHNLP Challenge 2018</conf-name>
          <conf-date>December, 2018</conf-date>
          <conf-loc>Washington, DC</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3233547.3233672</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Henry</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>The 2019 n2c2/OHNLP Track on Clinical Semantic Textual Similarity: Overview</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>11</month>
          <day>27</day>
          <volume>8</volume>
          <issue>11</issue>
          <fpage>e23375</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/11/e23375/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23375</pub-id>
          <pub-id pub-id-type="medline">33245291</pub-id>
          <pub-id pub-id-type="pii">v8i11e23375</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding</article-title>
          <year>2019</year>
          <conf-name>Human Language Technology: Conference of the North American Chapter of the Association of Computational Linguistics (HLT-NAACL)</conf-name>
          <conf-date>2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>4171</fpage>
          <lpage>4186</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/n19-1423</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lipscomb</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Medical Subject Headings (MeSH)</article-title>
          <source>Bull Med Libr Assoc</source>
          <year>2000</year>
          <month>07</month>
          <volume>88</volume>
          <issue>3</issue>
          <fpage>265</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/10928714"/>
          </comment>
          <pub-id pub-id-type="medline">10928714</pub-id>
          <pub-id pub-id-type="pmcid">PMC35238</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mueller</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Thyagarajan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Siamese Recurrent Architectures for Learning Sentence Similarity</article-title>
          <source>Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence</source>
          <year>2016</year>
          <conf-name>Thirtieth AAAI Conference on Artificial Intelligence</conf-name>
          <conf-date>February 12-17, 2016</conf-date>
          <conf-loc>Phoenix, AZ</conf-loc>
          <publisher-loc>Palo Alto, CA</publisher-loc>
          <publisher-name>AAAI Press</publisher-name>
          <fpage>2786</fpage>
          <lpage>2792</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Neculoiu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Versteegh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rotaru</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Learning text similarity with siamese recurrent networks</article-title>
          <year>2016</year>
          <month>8</month>
          <day>11</day>
          <conf-name>5th Workshop on Representation Learning for NLP, RepL4NLP@ACL 2020</conf-name>
          <conf-date>August 11, 2016</conf-date>
          <conf-loc>Berlin, Germany</conf-loc>
          <fpage>148</fpage>
          <lpage>157</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/w16-1617</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Convolutional Neural Network Architectures for Matching Natural Language Sentences</article-title>
          <year>2014</year>
          <month>12</month>
          <day>8</day>
          <conf-name>Neural Information Processing Systems (NeurIPS)</conf-name>
          <conf-date>December 8-13, 2014</conf-date>
          <conf-loc>Montreal, Quebec, Canada</conf-loc>
          <fpage>2042</fpage>
          <lpage>2050</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://papers.nips.cc/paper/5550-convolutional-neural-network-architectures-for-matching-natural-language-sentences.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Medical Question Retrieval Based on Siamese Neural Network Transfer Learning Method</article-title>
          <source>Database Systems for Advanced Applications</source>
          <year>2019</year>
          <month>4</month>
          <day>24</day>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer International Publishing</publisher-name>
          <fpage>49</fpage>
          <lpage>64</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Conneau</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kiela</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Schwenk</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Barrault</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bordes</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Supervised learning of universal sentence representations from natural language inference data</article-title>
          <source>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2017</year>
          <month>9</month>
          <conf-name>2017 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>Association for Computational Linguistics</conf-date>
          <conf-loc>Stroudsburg, PA</conf-loc>
          <publisher-name>Copenhagen, Denmark</publisher-name>
          <pub-id pub-id-type="doi">10.18653/v1/d17-1070</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gimpel</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Multi-Perspective Sentence Similarity Modeling with Convolutional Neural Networks</article-title>
          <source>Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing, EMNLP 2015</source>
          <year>2015</year>
          <month>9</month>
          <day>17</day>
          <conf-name>Conference on Empirical Methods in Natural Language Processing (EMNLP 2015)</conf-name>
          <conf-date>September 17-21, 2015</conf-date>
          <conf-loc>Lisbon, Portugal</conf-loc>
          <publisher-loc>Stroudsburg, PA</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>1576</fpage>
          <lpage>1586</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/d15-1181</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Kwak</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Semantic Sentence Matching with Densely-Connected Recurrent and Co-Attentive Information</article-title>
          <source>Proceedings of the AAAI Conference on Artificial Intelligence</source>
          <year>2019</year>
          <month>07</month>
          <day>17</day>
          <conf-name>AAAI Conference on Artificial Intelligence</conf-name>
          <conf-date>January 27 to February 1, 2019</conf-date>
          <conf-loc>Honolulu, HI</conf-loc>
          <fpage>6586</fpage>
          <lpage>6593</lpage>
          <pub-id pub-id-type="doi">10.1609/aaai.v33i01.33016586</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Schütze</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>ABCNN: Attention-Based Convolutional Neural Network for Modeling Sentence Pairs</article-title>
          <source>TACL</source>
          <year>2016</year>
          <month>12</month>
          <volume>4</volume>
          <fpage>259</fpage>
          <lpage>272</lpage>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00097</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hamza</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Florian</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Bilateral Multi-Perspective Matching for Natural Language Sentences</article-title>
          <source>Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence (IJCAI)</source>
          <year>2017</year>
          <month>8</month>
          <day>19</day>
          <conf-name>Twenty-Sixth International Joint Conference on Artificial Intelligence (IJCAI)</conf-name>
          <conf-date>August 19-25, 2017</conf-date>
          <conf-loc>Melbourne, Australia</conf-loc>
          <fpage>4144</fpage>
          <lpage>4150</lpage>
          <pub-id pub-id-type="doi">10.24963/ijcai.2017/579</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Pairwise Word Interaction Modeling with Deep Neural Networks for Semantic Similarity Measurement</article-title>
          <source>Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2016</year>
          <month>6</month>
          <day>12</day>
          <conf-name>Human Language Technology: Conference of the North American Chapter of the Association of Computational Linguistics (HLT-NAACL)</conf-name>
          <conf-date>June 12-17, 2016</conf-date>
          <conf-loc>San Diego CA</conf-loc>
          <publisher-loc>Stroudsburg, PA</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>937</fpage>
          <lpage>948</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/n16-1108</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lv</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Multiway Attention Networks for Modeling Sentence Pairs</article-title>
          <source>Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence</source>
          <year>2018</year>
          <month>7</month>
          <day>13</day>
          <conf-name>Twenty-Seventh International Joint Conference on Artificial Intelligence</conf-name>
          <conf-date>July 13-19, 2018</conf-date>
          <conf-loc>Stockholm, Sweden</conf-loc>
          <fpage>4411</fpage>
          <lpage>4417</lpage>
          <pub-id pub-id-type="doi">10.24963/ijcai.2018/613</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Natural Language Inference over Interaction Space</article-title>
          <year>2018</year>
          <month>4</month>
          <day>30</day>
          <conf-name>6th International Conference on Learning Representations, ICLR 2018</conf-name>
          <conf-date>May 3, 2018</conf-date>
          <conf-loc>Vancouver, BC,  Canada</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openreview.net/forum?id=r1dHXnH6-"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Carbonell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Salakhutdinov</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>XLNet: Generalized Autoregressive Pretraining for Language Understanding</article-title>
          <year>2019</year>
          <month>12</month>
          <day>8</day>
          <conf-name>Neural Information Processing Systems (NeurIPS), 2019</conf-name>
          <conf-date>December 8-14, 2019</conf-date>
          <conf-loc>Vancouver, BC, Canada</conf-loc>
          <fpage>5754</fpage>
          <lpage>5764</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://papers.nips.cc/paper/8812-xlnet-generalized-autoregressive-pretraining-for-language-understanding.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <source>RoBERTa: A Robustly Optimized BERT Pretraining Approach</source>
          <year>2019</year>
          <month>7</month>
          <day>26</day>
          <access-date>2019-07-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1907.11692">https://arxiv.org/abs/1907.11692</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Automatic de-identification of electronic medical records using token-level and character-level conditional random fields</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>12</month>
          <volume>58 Suppl</volume>
          <fpage>S47</fpage>
          <lpage>52</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00119-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.06.009</pub-id>
          <pub-id pub-id-type="medline">26122526</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00119-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4988843</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A Deep Learning-Based System for PharmaCoNER</article-title>
          <source>Proceedings of The 5th Workshop on BioNLP Open Shared Tasks, BioNLP-OST@EMNLP-IJNCLP 2019</source>
          <year>2019</year>
          <month>12</month>
          <day>4</day>
          <conf-name>5th Workshop on BioNLP Open Shared Tasks, BioNLP-OST@EMNLP-IJNCLP 2019</conf-name>
          <conf-date>November 4, 2019</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <publisher-loc>Stroudsburg, PA</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>33</fpage>
          <lpage>37</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/d19-5706</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hattori</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Di</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Character-Based LSTM-CRF with Radical-Level Features for Chinese Named Entity Recognition</article-title>
          <year>2016</year>
          <month>12</month>
          <day>2</day>
          <conf-name>Natural Language Understanding and Intelligent Applications - 5th CCF Conference on Natural Language Processing and Chinese Computing, NLPCC 2016, and 24th International Conference on Computer Processing of Oriental Languages, ICCPOL 2016</conf-name>
          <conf-date>December 2-6, 2016</conf-date>
          <conf-loc>Kunming, China</conf-loc>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer International Publishing</publisher-name>
          <fpage>239</fpage>
          <lpage>250</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-319-50496-4_20</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Multi-task Character-Level Attentional Networks for Medical Concept Normalization</article-title>
          <source>Neural Process Lett</source>
          <year>2018</year>
          <month>6</month>
          <day>18</day>
          <volume>49</volume>
          <issue>3</issue>
          <fpage>1239</fpage>
          <lpage>1256</lpage>
          <pub-id pub-id-type="doi">10.1007/s11063-018-9873-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Improving Coreference Resolution by Learning Entity-Level Distributed Representations</article-title>
          <source>Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics, ACL 2016</source>
          <year>2016</year>
          <month>8</month>
          <day>7</day>
          <conf-name>54th Annual Meeting of the Association for Computational Linguistics, ACL 2016</conf-name>
          <conf-date>August 7-12, 2016</conf-date>
          <conf-loc>Berlin, Germany</conf-loc>
          <publisher-loc>Stroudsburg, PA</publisher-loc>
          <publisher-name>The Association for Computer Linguistics</publisher-name>
          <pub-id pub-id-type="doi">10.18653/v1/p16-1061</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Representation Learning of EHR Data via Graph-Based Medical Entity Embedding</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <month>10</month>
          <day>7</day>
          <access-date>2019-10-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1910.02574">https://arxiv.org/abs/1910.02574</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Design Challenges and Misconceptions in Neural Sequence Labeling</article-title>
          <source>Proceedings of the 27th International Conference on Computational Linguistics, COLING 2018</source>
          <year>2018</year>
          <month>8</month>
          <day>20</day>
          <conf-name>27th International Conference on Computational Linguistics, COLING 2018</conf-name>
          <conf-date>August 20-26, 2018</conf-date>
          <conf-loc>Santa Fe, NM</conf-loc>
          <fpage>3879</fpage>
          <lpage>3889</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/C18-1327/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Entity recognition from clinical texts via recurrent neural network</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2017</year>
          <month>07</month>
          <day>05</day>
          <volume>17</volume>
          <issue>Suppl 2</issue>
          <fpage>67</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-017-0468-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-017-0468-7</pub-id>
          <pub-id pub-id-type="medline">28699566</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-017-0468-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5506598</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <source>Apache cTAKESTM - clinical Text Analysis Knowledge Extraction System</source>
          <access-date>2020-03-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ctakes.apache.org/">https://ctakes.apache.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bordes</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Usunier</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia-Duran</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yakhnenko</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Translating Embeddings for Modeling Multi-relational Data</article-title>
          <source>Advances in Neural Information Processing Systems</source>
          <year>2013</year>
          <month>12</month>
          <day>5</day>
          <conf-name>27th Annual Conference on Neural Information Processing Systems 2013</conf-name>
          <conf-date>December 5-8, 2013</conf-date>
          <conf-loc>Lake Tahoe, NV</conf-loc>
          <fpage>2787</fpage>
          <lpage>2795</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <source>Google Research: BERT</source>
          <year>2020</year>
          <access-date>2020-08-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/google-research/bert">https://github.com/google-research/bert</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Transfer Learning in Biomedical Natural Language Processing: An Evaluation of BERT and ELMo on Ten Benchmarking Datasets</article-title>
          <source>Proceedings of the 18th BioNLP Workshop and Shared Task, BioNLP@ACL 2019</source>
          <year>2019</year>
          <month>8</month>
          <day>1</day>
          <conf-name>18th BioNLP Workshop and Shared Task, BioNLP@ACL 2019</conf-name>
          <conf-date>August 1, 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <fpage>58</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/w19-5006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>02</month>
          <day>15</day>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1234</fpage>
          <lpage>1240</lpage>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
          <pub-id pub-id-type="medline">31501885</pub-id>
          <pub-id pub-id-type="pii">5566506</pub-id>
          <pub-id pub-id-type="pmcid">PMC7703786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Malheiros</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <source>senticnet: Access SenticNet data using Python Internet</source>
          <access-date>2020-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/yurimalheiros/senticnetapi">https://github.com/yurimalheiros/senticnetapi</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Stenner</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Doan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Waitman</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>MedEx: a medication information extraction system for clinical narratives</article-title>
          <source>Journal of the American Medical Informatics Association</source>
          <year>2010</year>
          <month>01</month>
          <day>01</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>19</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1197/jamia.m3378</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
