<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v7i4e14850</article-id>
      <article-id pub-id-type="pmid">31719024</article-id>
      <article-id pub-id-type="doi">10.2196/14850</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Combining Contextualized Embeddings and Prior Knowledge for Clinical Named Entity Recognition: Evaluation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Li</surname>
            <given-names>Fang</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Polepalli Ramesh</surname>
            <given-names>Balaji</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>Min</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Eli Lilly and Company</institution>
            <addr-line>893 Delaware St</addr-line>
            <addr-line>Indianapolis, IN, </addr-line>
            <country>United States</country>
            <phone>1 615 926 8277</phone>
            <email>jiang_min@lilly.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1407-0961</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Sanger</surname>
            <given-names>Todd</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1020-6374</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Xiong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4522-7228</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Eli Lilly and Company</institution>
        <addr-line>Indianapolis, IN</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Min Jiang <email>jiang_min@lilly.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <season>Oct-Dec</season>
        <year>2019</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>13</day>
        <month>11</month>
        <year>2019</year>
      </pub-date>
      <volume>7</volume>
      <issue>4</issue>
      <elocation-id>e14850</elocation-id>
      <history>
        <date date-type="received">
          <day>28</day>
          <month>5</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>18</day>
          <month>6</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>16</day>
          <month>7</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>19</day>
          <month>10</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Min Jiang, Todd Sanger, Xiong Liu. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 13.11.2019.</copyright-statement>
      <copyright-year>2019</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2019/4/e14850/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Named entity recognition (NER) is a key step in clinical natural language processing (NLP). Traditionally, rule-based systems leverage prior knowledge to define rules to identify named entities. Recently, deep learning–based NER systems have become more and more popular. Contextualized word embedding, as a new type of representation of the word, has been proposed to dynamically capture word sense using context information and has proven successful in many deep learning–based systems in either general domain or medical domain. However, there are very few studies that investigate the effects of combining multiple contextualized embeddings and prior knowledge on the clinical NER task.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to improve the performance of NER in clinical text by combining multiple contextual embeddings and prior knowledge.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>In this study, we investigate the effects of combining multiple contextualized word embeddings with classic word embedding in deep neural networks to predict named entities in clinical text. We also investigate whether using a semantic lexicon could further improve the performance of the clinical NER system.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>By combining contextualized embeddings such as ELMo and Flair, our system achieves the F-1 score of 87.30% when only training based on a portion of the 2010 Informatics for Integrating Biology and the Bedside NER task dataset. After incorporating the medical lexicon into the word embedding, the F-1 score was further increased to 87.44%. Another finding was that our system still could achieve an F-1 score of 85.36% when the size of the training data was reduced to 40%.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Combined contextualized embedding could be beneficial for the clinical NER task. Moreover, the semantic lexicon could be used to further improve the performance of the clinical NER system.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>named entity recognition</kwd>
        <kwd>deep learning</kwd>
        <kwd>contextualized word embedding</kwd>
        <kwd>semantic embedding</kwd>
        <kwd>prior knowledge</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>History of Clinical Named Entity Recognition</title>
        <p>
            
            Clinical named entity recognition (NER), an important clinical natural language processing (NLP) task, has been explored for several decades. In the early stage, most NER systems leverage rules and dictionaries to represent linguistic features and domain knowledge to identify clinical entities, such as MedLEE [<xref ref-type="bibr" rid="ref1">1</xref>], SymText/MPlus [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>], MetaMap [<xref ref-type="bibr" rid="ref4">4</xref>], KnowledgeMap [<xref ref-type="bibr" rid="ref5">5</xref>], cTAKES [<xref ref-type="bibr" rid="ref6">6</xref>], and HiTEX [<xref ref-type="bibr" rid="ref7">7</xref>]. To promote the development of machine learning–based system, many publicly available corpora have been developed by organizers of some clinical NLP challenges such as the Informatics for Integrating Biology and the Bedside (i2b2) 2009 [<xref ref-type="bibr" rid="ref8">8</xref>], 2010 [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref13">13</xref>], 2012 [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref18">18</xref>], 2014 [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref23">23</xref>], ShARe/CLEF eHealth Evaluation Lab 2013 dataset [<xref ref-type="bibr" rid="ref24">24</xref>], and Semantic Evaluation 2014 task 7 [<xref ref-type="bibr" rid="ref25">25</xref>], 2015 task 6 [<xref ref-type="bibr" rid="ref26">26</xref>], 2015 task 14 [<xref ref-type="bibr" rid="ref27">27</xref>], and 2016 task 12 [<xref ref-type="bibr" rid="ref28">28</xref>] datasets. Many machine learning–based clinical NER systems have been proposed, and they greatly improved performance compared with the early rule-based systems [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. Most systems are implemented based on two types of supervised machine learning algorithms: (1) classification algorithms such as support vector machines (SVMs) and (2) sequence labeling algorithms such as conditional random fields (CRFs), hidden Markov models (HMMs), and structural support vector machines (SSVMs). Among all of the algorithms, CRFs play the leading roles due to the advantage of the sequence labeling algorithms over classification algorithms in considering context information when making the prediction; CRFs, as one type of discriminative model, tend to achieve better performance for the same source of testing data compared with generative model-based algorithms such as HMMs. Even though CRFs have achieved a huge success in the clinical NER area, they have some obvious limitations: CRF-based systems lie in manually crafted features, which are time consuming, and their ability to capture context in a large window is limited.</p>
      </sec>
      <sec>
        <title>Deep Neural Network–Based Named Entity Recognition Algorithms</title>
        <p>In recent years, deep neural network–based NER algorithms have been extensively studied, and many deep learning–based clinical NER systems have been proposed. They have an obvious advantage over traditional machine learning algorithms since they do not require feature engineering, which is the most difficult part of designing machine learning–based systems. They also improve the ability to leverage the context information. Initially, word embedding [<xref ref-type="bibr" rid="ref31">31</xref>] is proposed as a method to represent the word in a continuous way to better support neural network structure. Then several new neural network structures including recurrent neural networks (RNNs) and long short-term memory (LSTM) [<xref ref-type="bibr" rid="ref32">32</xref>] have been introduced to better represent sequence-based input and overcome long-term dependency issues. Recently, contextual word representations generated from pretrained bidirectional language models (biLMs) have been shown to significantly improve the performance of state-of-the-art NER systems [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
        <p>In biLMs, the language model (LM) can be described as: given a sequence of N tokens, (<italic>t<sub>1</sub></italic>, <italic>t<sub>2</sub></italic>, ..., <italic>t<sub>N</sub></italic>), the probability of token <italic>t<sub>k</sub></italic> can be calculated given the history (<italic>t<sub>1</sub></italic>, ..., <italic>t<sub>k–1</sub></italic>), and the sequence probability can be computed as seen in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <p>Recent neural LMs usually include one layer of token input, which is represented by word embedding or a CNN over characters, followed by L layers of forward LSTMs. On the top layer, the SoftMax layer is added to generate a prediction score for the next token [<xref ref-type="bibr" rid="ref33">33</xref>]. The biLM combines two such neural LMs: the forward LM and backward LM; the backward LM is similar to the forward LM, except it runs over the reverse sequence. As a whole, the biLM tries to maximize the log-likelihood of the forward and backward directions as seen in <xref rid="figure2" ref-type="fig">Figure 2</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Sequence probability in bidirectional language models.</p>
          </caption>
          <graphic xlink:href="medinform_v7i4e14850_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Log-likelihood of the forward and backward directions language models.</p>
          </caption>
          <graphic xlink:href="medinform_v7i4e14850_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Where θ<sub>x</sub> represents the token representation layer, θ<sub>s</sub> represents the Softmax layer, and <inline-graphic xlink:href="medinform_v7i4e14850_fig6.png" mimetype="image" xlink:type="simple"/><italic><sub>LSTM</sub></italic> and <inline-graphic xlink:href="medinform_v7i4e14850_fig7.png" mimetype="image" xlink:type="simple"/><italic><sub>LSTM</sub></italic> represent the forward and backward directions of the LSTM layer.</p>
        <p>In 2017, Peters et al [<xref ref-type="bibr" rid="ref34">34</xref>] introduced a sequence tagger called TagLM that combines pretrained word embeddings and biLM embeddings as the representation of the word to improve the performance of the NER system. Since the output of each layer of the biLM represents a different type of contextual information [<xref ref-type="bibr" rid="ref35">35</xref>], Peters et al [<xref ref-type="bibr" rid="ref33">33</xref>] proposed another embedding, a deep contexualized word representation, ELMo, by concatenating all the biLM layer outputs into the biLM embedding with a weighted average pooling operation. The ELMo embedding adds CNN and highway networks over the character for each token as the input. ELMo has been proven to enhance the performance of different NLP tasks such as semantic role labeling and question answering [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
        <p>Similar to Peters’ ELMo, Akbik et al [<xref ref-type="bibr" rid="ref36">36</xref>] introduced contextual string embeddings for sequence labeling, which leverages neural character-level language modeling to generate a contextualized embedding for each word input within a sentence. The principle of the character-level LM is that it is the same as biLMs except that it runs on the sequences of characters instead of tokens. <xref rid="figure3" ref-type="fig">Figure 3</xref> shows the architecture of extracting a contextual string embedding for the word “hypotensive” in a sentence. We can see that instead of generating a fixed representation of the embedding for each word, the embedding of each token is composed of pretrained character embeddings from surrounding text, meaning the same token has dynamic representation depending on its context.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Architecture of extracting a contextual string embedding.</p>
          </caption>
          <graphic xlink:href="medinform_v7i4e14850_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Deep Neural Network–Based Clinical Named Entity Recognition Systems</title>
        <p>In the clinical domain, researchers investigated the performance of clinical NER tasks on various types of deep neural network structures. In 2015, researchers showed it is beneficial to use the large clinical corpus to generate word embeddings for clinical NER systems, and they comparatively investigated the different ways of generating word embeddings in the clinical domain [<xref ref-type="bibr" rid="ref37">37</xref>]. In 2017, Wu et al [<xref ref-type="bibr" rid="ref38">38</xref>] produced state-of-the-art results on the i2b2 2010 NER task dataset by employing the LSTM-CRF structure. Liu et al [<xref ref-type="bibr" rid="ref39">39</xref>] investigated the effects of two types of character word embeddings on LSTM-based systems on multiple i2b2/Veterans Administration (VA) NER task datasets. In 2018, Zhu et al [<xref ref-type="bibr" rid="ref40">40</xref>] employed a contextualized LM embedding on clinical data and boosted the state-of-the-art performance by 3.4% on the i2b2/VA 2010 NER dataset. The above studies show that, with the development of methods in text representation learning, especially contextual word embedding, more and more hidden knowledge can be learned from a large unannotated clinical corpus, which is beneficial for clinical NER tasks. According to the study by Peters et al [<xref ref-type="bibr" rid="ref35">35</xref>], contextual word representations derived from pretrained biLMs can learn different levels of information that vary with the depth of the network, from local syntactic information to long-range dependent semantic information. Even without leveraging traditional domain knowledge such as lexicon and ontology, deep learning–based NER systems can achieve better performance than traditional machine learning–based systems.</p>
        <p>Besides using pretrained representation from large unlabeled corpora, researchers started to integrate prior knowledge into deep learning frameworks to improve the performance of the NER system. For example, in the general domain, Yu and Dredze [<xref ref-type="bibr" rid="ref41">41</xref>] created a semantic word embedding based on WordNet and evaluated the performance on language modeling, semantic similarity, and human judgment prediction. In another example, Weston et al [<xref ref-type="bibr" rid="ref42">42</xref>] leveraged a CNN to generate a semantic embedding based on hashtags to improve the performance of the document recommendation task. In the clinical domain, Wu et al [<xref ref-type="bibr" rid="ref43">43</xref>] compared two types of methods to inject medical knowledge into deep learning–based clinical NER solutions and found that the RNN-based system combining medical knowledge as embeddings achieved the best performance on the i2b2 2010 dataset. In 2019, Wang et al [<xref ref-type="bibr" rid="ref44">44</xref>] explored two different architectures that extend the bidirectional LSTM (biLSTM) neural network and five different feature representation schemes to incorporate the medical dictionaries. In addition, other studies also use prior knowledge to generate embeddings [<xref ref-type="bibr" rid="ref45">45</xref>-<xref ref-type="bibr" rid="ref49">49</xref>].</p>
        <p>To date, no detailed analysis has been published to investigate the value of combining different types of word embeddings and prior knowledge for clinical NER. In this study, we made the following contributions: (1) we proposed an innovative method to combine two types of contextualized embeddings to study their effects on the clinical NLP challenge dataset, (2) we incorporated prior knowledge from semantic resources such as medical lexicon to evaluate if it could further improve the performance of the clinical NER system, and (3) we conducted a thorough evaluation on our models with different sizes of data to gain knowledge on how much data are needed to train a high-performance clinical NER system.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Datasets</title>
        <p>For this study, we used two datasets, the 2010 i2b2/VA concept extraction track dataset and the Medical Information Mart for Intensive Care III (MIMIC-III) corpus. The 2010 i2b2/VA challenge dataset is annotated with named entities, while the MIMIC-III corpus is unannotated data.</p>
        <sec>
          <title>2010 i2b2/VA Concept Extraction Track Dataset</title>
          <p>The goal of the 2010 i2b2/VA concept extraction task is to identify three types of clinical named entities including problem, treatment, and test from clinical notes. The original dataset includes 349 notes in the training set and 477 notes in the testing set, which include discharge summaries and progress notes from three institutions: Partners HealthCare, Beth Israel Deaconess Medical Center, and University of Pittsburgh Medical Center. Since the University of Pittsburgh Medical Center’s data have been removed from the original data set, the portion of discharge summaries that is available contains 170 notes for training and 256 for testing. In total, the training set contains 16,523 concepts including 7073 problems, 4844 treatments, and 4606 tests. The test set contains 31,161 concepts including 12,592 problems, 9344 treatments, and 9225 tests.</p>
        </sec>
        <sec>
          <title>Medical Information Mart for Intensive Care III Corpus</title>
          <p>The MIMIC-III corpus [<xref ref-type="bibr" rid="ref50">50</xref>] is from MIMIC-III database, which is a large, freely available de-identified health-related dataset that integrates de-identified, comprehensive clinical data of patients admitted to the Beth Israel Deaconess Medical Center in Boston, Massachusetts.</p>
          <p>The dataset comprises 2,083,180 notes from 15 different note types including “rehab services,” “case management,” “general,” “discharge summary,” “consult,” “radiology,” “electrocardiography,” “nutrition,” “social work,” “pharmacy,” “echocardiography,” “physician,” “nursing,” “nursing/other,” and “respiratory.”</p>
        </sec>
      </sec>
      <sec>
        <title>Embedding Generation</title>
        <p>In order to fit our text input into the deep neural network structure, we generated three types of embeddings: classic word embeddings, (2) contextualized LM–based word embeddings, and semantic word embeddings.</p>
        <sec>
          <title>Training Classic Word Embeddings</title>
          <p>We generated two types of word embeddings based on the MIMIC-III corpus and a medical lexicon: MIMIC-III corpus-based embeddings and tagged MIMIC-III corpus-based embeddings. We adopted the Word2Vec implementation database from Github [<xref ref-type="bibr" rid="ref51">51</xref>] to train word embeddings based on the MIMIC-III corpus. We used a continuous bag-of-words architecture with negative sampling. In accordance with the results from the study by Xu et al [<xref ref-type="bibr" rid="ref52">52</xref>], we set the dimension of embedding as 50.</p>
        </sec>
        <sec>
          <title>Training Contextual Language Model–Based Embeddings</title>
          <p>Besides the word embeddings, we employed two recently proposed methods to generate contextual LM-based embeddings: ELMo embeddings and (2) contextual string embeddings for sequence labeling (Flair).</p>
        </sec>
      </sec>
      <sec>
        <title>Training ELMo Embeddings</title>
        <p>We followed the method introduced by Zhu et al [<xref ref-type="bibr" rid="ref40">40</xref>] that uses a partial MIMIC-III corpus combined with a certain portion of Wikipedia pages as a training corpus to train the ELMo contextual LM in the clinical domain. In more detail, it combines discharge summaries and radiology reports from the MIMIC-III corpus and all the Wikipedia pages with titles that are items from the Systematized Nomenclature of Medicine–Clinical Terms. Such a corpus is trained on a deep neural network that contains a character-based CNN embedding layer followed by a two-layer biLSTM. Details have been published elsewhere [<xref ref-type="bibr" rid="ref40">40</xref>].</p>
      </sec>
      <sec>
        <title>Training Contextual String Embeddings for Sequence Labeling</title>
        <p>Akbik et al [<xref ref-type="bibr" rid="ref36">36</xref>] proposed a new method to generate a neural character-level LM. The paper shows the state-of-the-art performance on the Conference on Computational Natural Language Learning 2003 NER task dataset. The LM for the general domain is publicly accessible. The author also integrates all the codes into an NLP framework called Flair. It achieved great success on the data in the general domain. However, according to the research by Friedman et al [<xref ref-type="bibr" rid="ref53">53</xref>], clinical language has unique linguistic characteristics compared with general English, which make models generated from the public domain poorly adaptable to clinical narratives. It is demanding to train the LM on the clinical corpus to better support the clinical NER task. For training corpus preparation, we first did sentence segmentation on the entire corpus, then we randomly selected 1500 sentences as the testing set and another 1500 sentences for the validation set. The remaining part serves as the training set. For the hyperparameters, we kept the default setting: learning rate as 20.0, batch size as 32, anneal factor as 0.25, patience as 10, clip as 0.25, and hidden size as 1024.</p>
      </sec>
      <sec>
        <title>Training Semantic Word Embeddings</title>
        <p>Injecting domain knowledge into the deep learning model is a potential way to further improve the performance of the NER system. According to the results by Wu et al [<xref ref-type="bibr" rid="ref43">43</xref>], combining medical knowledge into the embedding outperforms the method of representing it as a one-hot vector. Therefore, we similarly created the embedding to represent medical lexicon and fed it into the deep learning framework in our study. More specifically, we initially generated a lexicon dictionary based on a subset of semantic categories in the Unified Medical Language System. We then identified all the lexicon occurrences in the corpus using the dictionary and replaced them with semantic categories. <xref rid="figure4" ref-type="fig">Figure 4</xref> shows an example of the conversion. In the example sentence of “No spontaneous thrombus is seen in the left atrium,” “thrombus” is replaced with the tag “DISORDER” and “left atrium” is replaced with two “BODYLOC” tags. In this way, we can integrate semantic information into the word embeddings. For the embedding generation, we use the same setting as in the previous section.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>One example of converting the sentence into the tagged sentence.</p>
          </caption>
          <graphic xlink:href="medinform_v7i4e14850_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Deep Neural Network Architecture</title>
        <p>After we generated all the embeddings, we started to fit them as the input into our deep neural network for the supervised training stage. Since each type of embedding is generated using one method, meaning each represents different aspects of knowledge from the large corpus, combining them is an obvious solution to potentially further improve the performance, which has also been proven by clinical NER studies [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. Although there are many options to combine multiple embeddings in the deep neural network system such as weighting [<xref ref-type="bibr" rid="ref54">54</xref>] and ensemble [<xref ref-type="bibr" rid="ref55">55</xref>], in this study, we adopted the most straightforward way, which is simply concatenating them as the input.</p>
        <p>We used the biLSTM-CRF sequence labeling module proposed by Huang et al [<xref ref-type="bibr" rid="ref56">56</xref>]. <xref rid="figure5" ref-type="fig">Figure 5</xref> shows the architecture of the whole deep neural network structure; the input is the embedding layer, which is concatenated by different types of embeddings as described in the previous section. Before we extracted embeddings for tagged word embedding, we used the same medical lexicon–based tagger to replace the tokens with the semantic tags. All the embedding inputs went through the biLSTM layer to generate forward and backward output, which was used to calculate the probability score by CRF layers. On the top, the prediction was given by a SoftMax layer.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Deep neural network structure with combined embeddings. Bi-LSTM: bidirectional long short-term memory; CRF: conditional random field.</p>
          </caption>
          <graphic xlink:href="medinform_v7i4e14850_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Training the Deep Neural Network–Based Sequence Tagger</title>
        <p>For the implementation, we employed Flair [<xref ref-type="bibr" rid="ref57">57</xref>], which is a simple framework for NLP tasks including NER and text classification. We used the default hyperparameter setting in Flair, and we used the following configuration: learning rate as 0.1, batch size as 32, dropout probability as 0.5, and maximum epoch as 500. The learning rate annealing method is basically the same as the default: we half the learning rate if the training loss does not fall for the consecutive “patience” number of epochs. We set the patience number to 12 in this study. A TITAN V (NVIDIA Corporation) graphics processing unit was used to train the model. We took about 4 hours to train our model each time.</p>
      </sec>
      <sec>
        <title>Evaluation</title>
        <p>In order to get more reliable results, we ran each model three times. For the measurement of each running, we used precision, recall, and F-1 score.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p><xref ref-type="table" rid="table1">Table 1</xref> shows the performance of the challenge winner system and different deep neural network systems. We used four benchmarks as our baseline systems, and then we reported the performance of the systems when adding ELMo embeddings, Flair embeddings, and tagged embeddings one at a time. All evaluation scores were based on exact matching. For the baseline systems, the first one is the semi-Markov model, developed by Debruijn et al [<xref ref-type="bibr" rid="ref13">13</xref>], which reported an F-1 score of 85.23%. The second and third baselines are both based on the LSTM model, and they reported F-1 scores of 85.78% and 85.94%, respectively. The last baseline is the best result for the nonensemble models from Zhu et al [<xref ref-type="bibr" rid="ref40">40</xref>], which used ELMo embedding. The three baseline systems used the original corpus (training: 349 notes; test: 477 notes), all other systems are based on the existing modified corpus (training: 170 notes; test: 256 notes). To start, we combined word embeddings with ELMo and Flair embeddings, respectively. Both models achieved an F-1 score of 87.01%, which is a little bit higher than what was reported by Zhu et al [<xref ref-type="bibr" rid="ref40">40</xref>]. After combining word embeddings with ELMo and Flair embeddings, the F-1 score increased to 87.30%. When the word embedding on the tagged corpus was incorporated, the performance was further improved to 87.44% for the F-1 score.</p>
      <p>In order to test if the improvement between different results is statistically significant, we conducted a statistical test based on results from bootstrapping. From the prediction result of the test set, we randomly selected 1000 sentences with replacement for 100 times and generated 100 bootstrap data sets. For each bootstrap data set, we evaluated F-measures for three pairs of results: (1) “biLSTM + ELMo” and “biLSTM + ELMo + Flair,” (2) “biLSTM + ELMo + Flair” and “biLSTM + ELMo + Flair + semantic embedding,” and (3) “biLSTM + ELMo by Zhu et al [<xref ref-type="bibr" rid="ref40">40</xref>]” and “biLSTM + ELMo + Flair + semantic embedding.” After that, we adopted a Wilcoxon signed rank test [<xref ref-type="bibr" rid="ref58">58</xref>] to determine if the differences between F-measures from the three pairs were statistically significant. The results show that the improvement of F-measures for all three pairs were statistically significant (<italic>P</italic> values were .01, .02, and .03, respectively).</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Performance of all the models on the 2010 i2b2/VA dataset.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="580"/>
          <col width="140"/>
          <col width="140"/>
          <col width="140"/>
          <thead>
            <tr valign="top">
              <td>Model</td>
              <td>F-1 (%)</td>
              <td>Precision (%)</td>
              <td>Recall (%)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Hidden semi-Markov<sup>a</sup></td>
              <td>85.23</td>
              <td>86.88</td>
              <td>83.64</td>
            </tr>
            <tr valign="top">
              <td>LSTM<sup>b</sup> by Liu et al [<xref ref-type="bibr" rid="ref39">39</xref>]<sup>a</sup></td>
              <td>85.78</td>
              <td>—<sup>c</sup></td>
              <td>—<sup>c</sup></td>
            </tr>
            <tr valign="top">
              <td>LSTM by Wu et al [<xref ref-type="bibr" rid="ref43">43</xref>]<sup>a</sup></td>
              <td>85.94</td>
              <td>85.33</td>
              <td>86.56</td>
            </tr>
            <tr valign="top">
              <td>BiLSTM<sup>d</sup> + ELMo by Zhu et al [<xref ref-type="bibr" rid="ref40">40</xref>]<sup>a</sup></td>
              <td>86.84 (0.16)</td>
              <td>87.44 (0.27)</td>
              <td>86.25 (0.26)</td>
            </tr>
            <tr valign="top">
              <td>BiLSTM + Flair</td>
              <td>87.01 (0.18)</td>
              <td>87.54 (0.15)</td>
              <td>86.49 (0.21)</td>
            </tr>
            <tr valign="top">
              <td>BiLSTM + ELMo</td>
              <td>87.01 (0.24)</td>
              <td>87.64 (0.19)</td>
              <td>86.40 (0.30)</td>
            </tr>
            <tr valign="top">
              <td>BiLSTM + ELMo + Flair</td>
              <td>87.30 (0.06)</td>
              <td>87.78 (0.09)</td>
              <td>86.85 (0.07)</td>
            </tr>
            <tr valign="top">
              <td>BiLSTM + ELMo + Flair + semantic embedding</td>
              <td>87.44 (0.07)</td>
              <td>88.03 (0.14)</td>
              <td>86.91 (0.10)</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table1fn1">
            <p><sup>a</sup>Model is trained using the complete dataset of i2b2 2010, which contains 349 notes in the training set and 477 notes in the test set.</p>
          </fn>
          <fn id="table1fn2">
            <p><sup>b</sup>LSTM: long short-term memory.</p>
          </fn>
          <fn id="table1fn3">
            <p><sup>c</sup>Not reported.</p>
          </fn>
          <fn id="table1fn4">
            <p><sup>d</sup>BiLSTM: bidirectional LSTM.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>NER is a fundamental task in the clinical NLP domain. In this study, we investigated the effects of combinations of different types of embeddings on the NER task. We also explored how to use medical lexicon to further improve performance. Based on the result, we found that either ELMo or Flair embeddings could boost the system’s performance, and combining both embeddings could further improve the performance. Although both ELMo and Flair embeddings use biLM to train the LM on MIMIC-III corpus, they actually generate the contextualized word embeddings in different ways. ELMo concatenates all the biLM layers to represent all different levels of the knowledge, while Flair embedding is generated by a character-level LM. Character-level LM is different from character-aware LM [<xref ref-type="bibr" rid="ref59">59</xref>] since it actually uses word-level LM while leveraging character-level features through a CNN encoding step. It was composed by the surrounding text’s embedding in the character-level. The difference between ELMo and Flair embeddings could explain the reason why they can play complementary roles in the model.</p>
        <p>The results show that adding semantic embeddings could further improve performance. According to the study by Peters et al [<xref ref-type="bibr" rid="ref35">35</xref>], the lower biLM layer specializes in local syntactic relationships, while the higher layers focus on modeling longer range relationships. Those relationships are learned from the pure clinical corpus without any resources from outside such as medical lexicons and ontologies. This study shows an effective way to incorporate domain knowledge into the deep neural network–based NER system.</p>
        <p>A large amount of training data is required to achieve success when applying deep learning algorithms [<xref ref-type="bibr" rid="ref60">60</xref>]. Within the general domain, it is more difficult to accumulate a large size of the annotated corpus for most of the clinical NLP tasks since it usually requires the annotator to have in-depth domain knowledge. Contextualized word embeddings, as an effective way of transferring the knowledge from the large unlabeled corpus, could address the issue of lack of training data. According to the results, by only using the small size of the training corpus (170 notes), contextualized word embedding–based models could achieve better performance than the models that use the large size training corpus (349 notes). To further investigate the effectiveness of transfer learning in our proposed models, we compared the performance of our best model generated from different sizes of the training data. <xref ref-type="table" rid="table2">Table 2</xref> shows the F-1 score for the model “biLSTM + ELMo + Flair + semantic embedding” on randomly selected 80%, 60%, 40%, 20%, and 10% of the training data. Surprisingly, we found that using only 40% of the training corpus could achieve comparable performance as the original state-of-the-art traditional machine learning–based system. Even using 20% of the training corpus, the model’s F-1 score is still more than 80%. This result indicates that contextualized word representation could potentially be an effective way to reduce the size of the training corpus, which could significantly improve the feasibility of applying deep learning to real practice.</p>
        <p>Besides the performance reported in the Results section, we also recorded the change of performance for our proposed models during the fine-tuning stage. <xref ref-type="table" rid="table3">Table 3</xref> shows the F-1 score on 1, 20, 40, and 60 epochs for our three models. On epoch 1, comparing to only word embeddings, any contextualized word embedding boosts the F-1 score. This is mostly because pretraining on contextualized word embeddings is very beneficial for the task of named entity recognition. This proves that the LM is a good way for pretraining that can be adapted to different downstream NLP tasks. Another interesting finding is that even though the model ELMo achieved the best performance among our three models, it was surpassed by the other two models on later epochs, which indicates that during the optimization process, the best starting point does not necessarily lead to the best local optimal solution.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Performance of the best model training, BiLSTM<sup>a</sup> + ELMo + Flair + semantic embedding, on different sizes of the training corpus.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Amount of training data (%)</td>
                <td>F-1 (%)</td>
                <td>Prec (%)</td>
                <td>Rec (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>10</td>
                <td>71.13</td>
                <td>69.59</td>
                <td>72.74</td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>82.05</td>
                <td>81.92</td>
                <td>82.18</td>
              </tr>
              <tr valign="top">
                <td>40</td>
                <td>85.36</td>
                <td>85.83</td>
                <td>84.90</td>
              </tr>
              <tr valign="top">
                <td>60</td>
                <td>86.33</td>
                <td>86.81</td>
                <td>85.86</td>
              </tr>
              <tr valign="top">
                <td>80</td>
                <td>86.92</td>
                <td>87.42</td>
                <td>86.43</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>BiLSTM: bidirectional long short-term memory.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>F-1 score for our proposed models on different epochs.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="480"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>1 epoch (%)</td>
                <td>20 epochs (%)</td>
                <td>40 epochs (%)</td>
                <td>60 epochs (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Classic word embedding</td>
                <td>61.23</td>
                <td>75.67</td>
                <td>78.11</td>
                <td>79.52</td>
              </tr>
              <tr valign="top">
                <td>Classic word embedding + ELMo</td>
                <td>76.18</td>
                <td>85.64</td>
                <td>85.68</td>
                <td>86.63</td>
              </tr>
              <tr valign="top">
                <td>Classic word embedding + ELMo + Flair</td>
                <td>73.28</td>
                <td>85.33</td>
                <td>85.97</td>
                <td>86.96</td>
              </tr>
              <tr valign="top">
                <td>Classic word embedding + ELMo + Flair + semantic embedding</td>
                <td>74.38</td>
                <td>85.85</td>
                <td>86.46</td>
                <td>87.13</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study has some limitations. For contextualized embedding generation, we followed others’ research methods and didn’t test different configurations for LM training. For example, for ELMo embeddings, we followed the work of Zhu et al [<xref ref-type="bibr" rid="ref40">40</xref>] for Flair embedding generation and kept the same configuration as seen in the work by Akbik et al [<xref ref-type="bibr" rid="ref36">36</xref>]. For the fine-tuning stage, we only fine-tuned a limited set of hyperparameters including learning rate and patience. For domain knowledge integration, there are a lot of options that could be explored to merge the lexicon information into the input of the deep neural network structure. In this study, we only tried one way to represent it in the form of word embeddings. In this paper, we studied two contextualized embeddings: ELMo and Flair. In the future, we plan to test our framework by adding bidirectional encoder representations from transformers, which is another popular contextualized embedding [<xref ref-type="bibr" rid="ref61">61</xref>].</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, we investigated the effects of the combination of two contextualized word embeddings including ELMo and Flair and clinical knowledge for the clinical NER task. Our evaluation on the 2010 i2b2/VA challenge dataset shows that using both ELMo and Flair embeddings outperforms using only ELMo embeddings, which indicates its great potential for the clinical NLP research. Furthermore, we demonstrate that incorporating the medical lexicon into the word representation could further improve the performance. Finally, we found that adopting our best model would be an effective way to reduce the size of the required training corpus for the clinical NER task.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">biLM</term>
          <def>
            <p>bidirectional language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">biLSTM</term>
          <def>
            <p>bidirectional long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CRF</term>
          <def>
            <p>conditional random field</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HMM</term>
          <def>
            <p>hidden Markov model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">i2b2</term>
          <def>
            <p>Informatics for Integrating Biology and the Bedside</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LM</term>
          <def>
            <p>language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">MIMIC-III</term>
          <def>
            <p>Medical Information Mart for Intensive Care III</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">NER</term>
          <def>
            <p>named entity recognition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">RNN</term>
          <def>
            <p>recurrent neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">SSVM</term>
          <def>
            <p>structural support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">VA</term>
          <def>
            <p>Veterans Affairs</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by the Advanced Analytics and Data Science organization at Eli Lilly and Company.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Alderson</surname>
              <given-names>PO</given-names>
            </name>
            <name name-style="western">
              <surname>Austin</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Cimino</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>SB</given-names>
            </name>
          </person-group>
          <article-title>A general natural-language text processor for clinical radiology</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>1994</year>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>161</fpage>
          <lpage>174</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=7719797"/>
          </comment>
          <pub-id pub-id-type="medline">7719797</pub-id>
          <pub-id pub-id-type="pmcid">PMC116194</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Haug</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fiszman</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>MPLUS: a probabilistic medical language understanding system</article-title>
          <year>2002</year>
          <conf-name>Proceedings of the ACL-02 Workshop on Natural Language Processing in the Biomedical Domain</conf-name>
          <conf-date>2002</conf-date>
          <conf-loc>Stroudsburg</conf-loc>
          <pub-id pub-id-type="doi">10.3115/1118149.1118154</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koehler</surname>
              <given-names>SB</given-names>
            </name>
          </person-group>
          <source>SymText: A Natural Language Understanding System for Encoding Free Text Medical Data</source>
          <year>1999</year>
          <publisher-loc>Provo</publisher-loc>
          <publisher-name>University of Utah</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Lang</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>An overview of MetaMap: historical perspective and recent advances</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>229</fpage>
          <lpage>236</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/lookup/pmidlookup?view=long&#38;pmid=20442139"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.002733</pub-id>
          <pub-id pub-id-type="medline">20442139</pub-id>
          <pub-id pub-id-type="pii">17/3/229</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Irani</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>Wehbe</surname>
              <given-names>FH</given-names>
            </name>
            <name name-style="western">
              <surname>Smithers</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Spickard</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The KnowledgeMap project: development of a concept-based medical school curriculum database</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2003</year>
          <fpage>195</fpage>
          <lpage>199</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/14728161"/>
          </comment>
          <pub-id pub-id-type="medline">14728161</pub-id>
          <pub-id pub-id-type="pii">D030003640</pub-id>
          <pub-id pub-id-type="pmcid">PMC1480333</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Masanz</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ogren</surname>
              <given-names>PV</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kipper-Schuler</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
          </person-group>
          <article-title>Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): architecture, component evaluation and applications</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>507</fpage>
          <lpage>513</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/lookup/pmidlookup?view=long&#38;pmid=20819853"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.001560</pub-id>
          <pub-id pub-id-type="medline">20819853</pub-id>
          <pub-id pub-id-type="pii">17/5/507</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995668</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>QT</given-names>
            </name>
            <name name-style="western">
              <surname>Goryachev</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sordo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Lazarus</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Extracting principal diagnosis, co-morbidity and smoking status for asthma research: evaluation of a natural language processing system</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2006</year>
          <month>7</month>
          <day>26</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>30</fpage>
          <pub-id pub-id-type="doi">10.1186/1472-6947-6-30</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Solti</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Cadag</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Extracting medication information from clinical text</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>514</fpage>
          <lpage>518</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20819854"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2010.003947</pub-id>
          <pub-id pub-id-type="medline">20819854</pub-id>
          <pub-id pub-id-type="pii">17/5/514</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995677</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Riloff</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hurdle</surname>
              <given-names>JF</given-names>
            </name>
          </person-group>
          <article-title>A study of concept extraction across different types of clinical notes</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2015</year>
          <volume>2015</volume>
          <fpage>737</fpage>
          <lpage>746</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26958209"/>
          </comment>
          <pub-id pub-id-type="medline">26958209</pub-id>
          <pub-id pub-id-type="pmcid">PMC4765588</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Recognizing clinical entities in hospital discharge summaries using Structural Support Vector Machines with word representation features</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2013</year>
          <volume>13 Suppl 1</volume>
          <fpage>S1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-13-S1-S1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1472-6947-13-S1-S1</pub-id>
          <pub-id pub-id-type="medline">23566040</pub-id>
          <pub-id pub-id-type="pii">1472-6947-13-S1-S1</pub-id>
          <pub-id pub-id-type="pmcid">PMC3618243</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>South</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>DuVall</surname>
              <given-names>SL</given-names>
            </name>
          </person-group>
          <article-title>2010 i2b2/VA challenge on concepts, assertions, and relations in clinical text</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>552</fpage>
          <lpage>556</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21685143"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000203</pub-id>
          <pub-id pub-id-type="medline">21685143</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000203</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168320</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>AR</given-names>
            </name>
          </person-group>
          <article-title>Effective mapping of biomedical text to the UMLS Metathesaurus: the MetaMap program</article-title>
          <source>Proc AMIA Symp</source>
          <year>2001</year>
          <fpage>17</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/11825149"/>
          </comment>
          <pub-id pub-id-type="medline">11825149</pub-id>
          <pub-id pub-id-type="pii">D010001275</pub-id>
          <pub-id pub-id-type="pmcid">PMC2243666</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Bruijn</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cherry</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kiritchenko</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Machine-learned solutions for three stages of clinical information extraction: the state of the art at i2b2 2010</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>557</fpage>
          <lpage>562</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21565856"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000150</pub-id>
          <pub-id pub-id-type="medline">21565856</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000150</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168309</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Rumshisky</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Evaluating temporal relations in clinical text: 2012 i2b2 Challenge</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <month>09</month>
          <day>01</day>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>806</fpage>
          <lpage>813</lpage>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001628</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tsujii</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>EI</given-names>
            </name>
          </person-group>
          <article-title>An end-to-end system to identify temporal relation in discharge summaries: 2012 i2b2 challenge</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>849</fpage>
          <lpage>858</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23467472"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2012-001607</pub-id>
          <pub-id pub-id-type="medline">23467472</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2012-001607</pub-id>
          <pub-id pub-id-type="pmcid">PMC3756267</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A hybrid system for temporal information extraction from clinical text</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>828</fpage>
          <lpage>835</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23571849"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001635</pub-id>
          <pub-id pub-id-type="medline">23571849</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2013-001635</pub-id>
          <pub-id pub-id-type="pmcid">PMC3756274</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wagholikar</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jonnalagadda</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Komandur Elayavilli</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Comprehensive temporal information detection from clinical text: medical events, time, and TLINK identification</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <month>09</month>
          <day>01</day>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>836</fpage>
          <lpage>842</lpage>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001622</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kovačević</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dehghan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Filannino</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Keane</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Nenadic</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Combining rules and machine learning for extraction of temporal expressions and events from clinical narratives</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <month>09</month>
          <day>01</day>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>859</fpage>
          <lpage>866</lpage>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001625</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stubbs</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kotfila</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Automated systems for the de-identification of longitudinal clinical narratives: overview of 2014 i2b2/UTHealth shared task Track 1</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>12</month>
          <volume>58 Suppl</volume>
          <fpage>S11</fpage>
          <lpage>S19</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00117-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.06.007</pub-id>
          <pub-id pub-id-type="medline">26225918</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00117-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC4989908</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Garibaldi</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Automatic detection of protected health information from clinic narratives</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>12</month>
          <volume>58 Suppl</volume>
          <fpage>S30</fpage>
          <lpage>S38</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00125-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.06.015</pub-id>
          <pub-id pub-id-type="medline">26231070</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00125-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4989090</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Automatic de-identification of electronic medical records using token-level and character-level conditional random fields</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>12</month>
          <volume>58 Suppl</volume>
          <fpage>S47</fpage>
          <lpage>S52</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00119-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.06.009</pub-id>
          <pub-id pub-id-type="medline">26122526</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00119-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4988843</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hua</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>CRFs based de-identification of medical records</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>12</month>
          <volume>58 Suppl</volume>
          <fpage>S39</fpage>
          <lpage>S46</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00179-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.08.012</pub-id>
          <pub-id pub-id-type="medline">26315662</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00179-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC4988860</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dehghan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kovacevic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Karystianis</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Keane</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Nenadic</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Combining knowledge- and data-driven methods for de-identification of clinical narratives</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>12</month>
          <volume>58 Suppl</volume>
          <fpage>S53</fpage>
          <lpage>S59</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00139-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.06.029</pub-id>
          <pub-id pub-id-type="medline">26210359</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00139-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4976126</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suominen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Salanterä</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Velupillai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Overview of the ShARe/CLEF eHealth evaluation lab</article-title>
          <year>2013</year>
          <conf-name>International Conference of the Cross-Language Evaluation Forum for European Languages</conf-name>
          <conf-date>2013</conf-date>
          <conf-loc>Valencia</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-642-40802-1_24</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pradhan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Manandhar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <source>Semeval-2014 task 7: analysis of clinical text</source>
          <year>2014</year>
          <access-date>2019-10-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://alt.qcri.org/semeval2014/cdrom/pdf/SemEval2014007.pdf">http://alt.qcri.org/semeval2014/cdrom/pdf/SemEval2014007.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bethard</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Derczynski</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Pustejovsky</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Verhagen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>Semeval-2015 task 6: clinical tempeval</source>
          <year>2015</year>
          <access-date>2019-10-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://alt.qcri.org/semeval2015/cdrom/pdf/SemEval136.pdf">http://alt.qcri.org/semeval2015/cdrom/pdf/SemEval136.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Pradhan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gorman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Manandhar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <source>SemEval-2015 task 14: analysis of clinical text</source>
          <year>2015</year>
          <access-date>2019-10-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://alt.qcri.org/semeval2015/cdrom/pdf/SemEval051.pdf">http://alt.qcri.org/semeval2015/cdrom/pdf/SemEval051.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bethard</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Derczynski</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Pustejovsky</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Verhagen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>SemEval-2016 task 12: clinical TempEval</source>
          <year>2016</year>
          <access-date>2019-10-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://alt.qcri.org/semeval2016/task12/">http://alt.qcri.org/semeval2016/task12/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenbloom</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Mani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A study of machine-learning-based approaches to extract clinical entities and their assertions from discharge summaries</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>601</fpage>
          <lpage>606</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21508414"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000163</pub-id>
          <pub-id pub-id-type="medline">21508414</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000163</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168315</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Clinical named entity recognition from Chinese electronic health records via machine learning methods</article-title>
          <source>JMIR Med Inform</source>
          <year>2018</year>
          <month>12</month>
          <day>17</day>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>e50</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2018/4/e50/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.9965</pub-id>
          <pub-id pub-id-type="medline">30559093</pub-id>
          <pub-id pub-id-type="pii">v6i4e50</pub-id>
          <pub-id pub-id-type="pmcid">PMC6315256</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of words and phrases and their compositionality</article-title>
          <source>Adv Neural Info Process Sys</source>
          <year>2013</year>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Comput</source>
          <year>1997</year>
          <month>11</month>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>1780</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Neumann</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Iyyer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gardner</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Deep contextualized word representations</article-title>
          <source>arXiv preprint</source>
          <year>2018</year>
          <fpage>180205365</fpage>
          <pub-id pub-id-type="doi">10.18653/v1/n18-1202</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ammar</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bhagavatula</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Power</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Semi-supervised sequence tagging with bidirectional language models</article-title>
          <source>arXiv preprint</source>
          <year>2017</year>
          <fpage>170500108</fpage>
          <pub-id pub-id-type="doi">10.18653/v1/p17-1161</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Neumann</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Dissecting contextual word embeddings: architecture and representation</article-title>
          <source>arXiv preprint</source>
          <year>2018</year>
          <pub-id pub-id-type="doi">10.18653/v1/d18-1179</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Akbik</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Blythe</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Vollgraf</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>Contextual string embeddings for sequence labeling</source>
          <year>2018</year>
          <access-date>2019-10-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://alanakbik.github.io/papers/coling2018.pdf">https://alanakbik.github.io/papers/coling2018.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A study of neural word embeddings for named entity recognition in clinical text</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2015</year>
          <volume>2015</volume>
          <fpage>1326</fpage>
          <lpage>1333</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26958273"/>
          </comment>
          <pub-id pub-id-type="medline">26958273</pub-id>
          <pub-id pub-id-type="pmcid">PMC4765694</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Clinical named entity recognition using deep learning models</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2017</year>
          <volume>2017</volume>
          <fpage>1812</fpage>
          <lpage>1819</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29854252"/>
          </comment>
          <pub-id pub-id-type="medline">29854252</pub-id>
          <pub-id pub-id-type="pmcid">PMC5977567</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Entity recognition from clinical texts via recurrent neural network</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2017</year>
          <month>07</month>
          <day>05</day>
          <volume>17</volume>
          <issue>Suppl 2</issue>
          <fpage>67</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-017-0468-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-017-0468-7</pub-id>
          <pub-id pub-id-type="medline">28699566</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-017-0468-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5506598</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Paschalidis</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Tahmasebi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>arXiv preprint</source>
          <year>2018</year>
          <access-date>2019-10-22</access-date>
          <comment>Clinical concept extraction with contextual word embedding. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1810.10566">https://arxiv.org/abs/1810.10566</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Improving lexical embeddings with semantic knowledge</article-title>
          <year>2014</year>
          <conf-name>Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 2)</conf-name>
          <conf-date>2014</conf-date>
          <conf-loc>Baltimore</conf-loc>
          <pub-id pub-id-type="doi">10.3115/v1/p14-2089</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chopra</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Semantic embeddings from hashtags</article-title>
          <year>2014</year>
          <conf-name>Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP)</conf-name>
          <conf-date>2014</conf-date>
          <conf-loc>Doha</conf-loc>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1194</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hogan</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Combine factual medical knowledge and distributed word representation to improve clinical named entity recognition</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2018</year>
          <volume>2018</volume>
          <fpage>1110</fpage>
          <lpage>1117</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30815153"/>
          </comment>
          <pub-id pub-id-type="medline">30815153</pub-id>
          <pub-id pub-id-type="pmcid">PMC6371322</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ruan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Incorporating dictionaries into deep neural networks for the Chinese clinical named entity recognition</article-title>
          <source>J Biomed Inform</source>
          <year>2019</year>
          <month>04</month>
          <volume>92</volume>
          <fpage>103133</fpage>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2019.103133</pub-id>
          <pub-id pub-id-type="medline">30818005</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(19)30051-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Learning semantic word embeddings based on ordinal knowledge constraints</article-title>
          <year>2015</year>
          <conf-name>Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1)</conf-name>
          <conf-date>2015</conf-date>
          <conf-loc>Beijing</conf-loc>
          <pub-id pub-id-type="doi">10.3115/v1/p15-1145</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mencia</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>de Melo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nam</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Medical concept embeddings via labeled background corpora</article-title>
          <year>2016</year>
          <conf-name>Proceedings of the 10th Language Resources and Evaluation Conference (LREC )</conf-name>
          <conf-date>2016</conf-date>
          <conf-loc>Portoroz</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mamitsuka</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>DeepMeSH: deep semantic representation for improving large-scale MeSH indexing</article-title>
          <source>Bioinformatics</source>
          <year>2016</year>
          <month>06</month>
          <day>15</day>
          <volume>32</volume>
          <issue>12</issue>
          <fpage>i70</fpage>
          <lpage>i79</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27307646"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btw294</pub-id>
          <pub-id pub-id-type="medline">27307646</pub-id>
          <pub-id pub-id-type="pii">btw294</pub-id>
          <pub-id pub-id-type="pmcid">PMC4908368</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Celikyilmaz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hakkani-Tur</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pasupat</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sarikaya</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>Enriching word embeddings using knowledge graph for semantic tagging in conversational dialog systems</source>
          <access-date>2019-10-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aaai.org/ocs/index.php/SSS/SSS15/paper/download/10333/10034">https://www.aaai.org/ocs/index.php/SSS/SSS15/paper/download/10333/10034</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>McCallum</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Lexicon infused phrase embeddings for named entity resolution</article-title>
          <source>arXiv preprint</source>
          <year>2014</year>
          <fpage>14045367</fpage>
          <pub-id pub-id-type="doi">10.3115/v1/w14-1609</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AEW</given-names>
            </name>
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>MIMIC-III, a freely accessible critical care database</article-title>
          <source>Sci Data</source>
          <year>2016</year>
          <volume>3</volume>
          <fpage>160035</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27219127"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id>
          <pub-id pub-id-type="medline">27219127</pub-id>
          <pub-id pub-id-type="pii">sdata201635</pub-id>
          <pub-id pub-id-type="pmcid">PMC4878278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="web">
          <source>Word2Vec implementation</source>
          <access-date>2019-10-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/dav/word2vec">https://github.com/dav/word2vec</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Soysal</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <source>UTH-CCB: the participation of the SemEval 2015 challenge—Task 14</source>
          <access-date>2019-10-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://clamp.uth.edu/challenges-publications/UTH-CCB-%20the%20participation%20of%20the%20SemEval%202015%20challenge%E2%80%93Task%2014.pdf">https://clamp.uth.edu/challenges-publications/UTH-CCB-%20the%20participation%20of%20the%20 SemEval%202015%20challenge%E2%80%93Task%2014.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kra</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Rzhetsky</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Two biomedical sublanguages: a description based on the theories of Zellig Harris</article-title>
          <source>J Biomed Inform</source>
          <year>2002</year>
          <month>08</month>
          <volume>35</volume>
          <issue>4</issue>
          <fpage>222</fpage>
          <lpage>235</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(03)00012-1"/>
          </comment>
          <pub-id pub-id-type="medline">12755517</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(03)00012-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reimers</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gurevych</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Alternative weighting schemes for ELMo embeddings</article-title>
          <source>arXiv preprint</source>
          <year>2019</year>
          <fpage>190402954</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Speer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>An ensemble method to produce high-quality word embeddings</article-title>
          <source>arXiv preprint</source>
          <year>2016</year>
          <fpage>160401692</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Bidirectional LSTM-CRF models for sequence tagging</article-title>
          <source>arXiv preprint</source>
          <year>2015</year>
          <fpage>150801991</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Akbik</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>Flair implementation</source>
          <access-date>2019-10-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/zalandoresearch/flair/graphs/contributors2018">https://github.com/zalandoresearch/flair/graphs/contributors2018</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Woolson</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Wilcoxon signed-rank test</article-title>
          <source>Wiley Encyclopedia of Clinical Trials</source>
          <year>2007</year>
          <fpage>1</fpage>
          <lpage>3</lpage>
          <pub-id pub-id-type="doi">10.1002/9780471462422.eoct979</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jernite</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sontag</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rush</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <source>Character-aware neural language models</source>
          <access-date>2019-10-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1508.06615">https://arxiv.org/abs/1508.06615</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>LeCun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Deep learning</article-title>
          <source>Nature</source>
          <year>2015</year>
          <month>5</month>
          <day>27</day>
          <volume>521</volume>
          <issue>7553</issue>
          <fpage>436</fpage>
          <lpage>444</lpage>
          <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Bert: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>arXiv preprint</source>
          <year>2018</year>
          <fpage>181004805</fpage>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
