<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v7i2e12704</article-id>
      <article-id pub-id-type="pmid">31124461</article-id>
      <article-id pub-id-type="doi">10.2196/12704</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Development of a Consumer Health Vocabulary by Mining Health Forum Texts Based on Word Embedding: Semiautomatic Approach</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Bian</surname>
            <given-names>Jiang</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wang</surname>
            <given-names>Yanshan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Vydiswaran</surname>
            <given-names>Vinod</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lin</surname>
            <given-names>Pao-hwa</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="author" id="contrib1">
          <name name-style="western">
            <surname>Gu</surname>
            <given-names>Gen</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-3476-2041</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib2">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Xingting</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-1190-5422</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib3">
          <name name-style="western">
            <surname>Zhu</surname>
            <given-names>Xingeng</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-4981-8500</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib4">
          <name name-style="western">
            <surname>Jian</surname>
            <given-names>Zhe</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-0439-6087</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib5">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Ken</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-2053-917X</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib6">
          <name name-style="western">
            <surname>Wen</surname>
            <given-names>Dong</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-3549-3311</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib7">
          <name name-style="western">
            <surname>Gao</surname>
            <given-names>Li</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-8858-129X</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib8">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Shaodian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-8514-9503</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib9">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Fei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-2212-3947</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib10">
          <name name-style="western">
            <surname>Ma</surname>
            <given-names>Handong</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-3945-2990</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib11" corresp="yes">
          <name name-style="western">
            <surname>Lei</surname>
            <given-names>Jianbo</given-names>
          </name>
          <degrees>PhD, MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Center for Medical Informatics</institution>
            <institution>Peking University</institution>
            <addr-line>No. 1 Administration Building, Peking University Health Science Center, Haidian Dist</addr-line>
            <addr-line>Beijing,</addr-line>
            <country>China</country>
            <phone>86 (010)82805906</phone>
            <email>jblei@hsc.pku.edu.cn</email>
          </address>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-1744-0235</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Synyi Research</institution>
        <addr-line>Shanghai</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
      <label>2</label>
      <institution>Center for Medical Informatics</institution>
      <institution>Peking University</institution>  
      <addr-line>Beijing</addr-line>
      <country>China</country></aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Harbin Medical University</institution>
        <addr-line>Harbin</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
      <label>4</label>
      <institution>School of Stomatology</institution>
      <institution>Peking University</institution>  
      <addr-line>Beijing</addr-line>
      <country>China</country></aff>
      <aff id="aff5">
      <label>5</label>
      <institution>APEX Data &amp; Knowledge Management Lab</institution>
      <institution>Shanghai Jiao Tong University</institution>  
      <addr-line>Shanghai</addr-line>
      <country>China</country></aff>
      <aff id="aff6">
      <label>6</label>
      <institution>Department of Healthcare Policy and Research</institution>
      <institution>Weill Cornell Medicine</institution>  
      <addr-line>New York, NY</addr-line>
      <country>United States</country></aff>
      <aff id="aff7">
      <label>7</label>
      <institution>School of Medical Informatics and Engineering</institution>
      <institution>Southwest Medical University</institution>  
      <addr-line>Luzhou city, Sichuan Province</addr-line>
      <country>China</country></aff>
      <author-notes>
        <corresp>Corresponding Author: Jianbo Lei 
        <email>jblei@hsc.pku.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection"><season>Apr-Jun</season><year>2019</year></pub-date>
      <pub-date pub-type="epub">
        <day>23</day>
        <month>05</month>
        <year>2019</year>
      </pub-date>
      <volume>7</volume>
      <issue>2</issue>
      <elocation-id>e12704</elocation-id>
      <!--history from ojs - api-xml-->
      <history>
        <date date-type="received">
          <day>6</day>
          <month>11</month>
          <year>2018</year>
        </date>
        <date date-type="rev-request">
          <day>12</day>
          <month>12</month>
          <year>2018</year>
        </date>
        <date date-type="rev-recd">
          <day>19</day>
          <month>3</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>5</day>
          <month>4</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Gen Gu, Xingting Zhang, Xingeng Zhu, Zhe Jian, Ken Chen, Dong Wen, Li Gao, Shaodian Zhang, Fei Wang, Handong Ma, Jianbo Lei. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 23.05.2019.</copyright-statement>
      <copyright-year>2019</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2019/2/e12704/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The vocabulary gap between consumers and professionals in the medical domain hinders information seeking and communication. Consumer health vocabularies have been developed to aid such informatics applications. This purpose is best served if the vocabulary evolves with consumers’ language.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>Our objective is to develop a method for identifying and adding new terms to consumer health vocabularies, so that it can keep up with the constantly evolving medical knowledge and language use.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>In this paper, we propose a consumer health term–finding framework based on a distributed word vector space model. We first learned word vectors from a large-scale text corpus and then adopted a supervised method with existing consumer health vocabularies for learning vector representation of words, which can provide additional supervised fine tuning after unsupervised word embedding learning. With a fine-tuned word vector space, we identified pairs of professional terms and their consumer variants by their semantic distance in the vector space. A subsequent manual review of the extracted and labeled pairs of entities was conducted to validate the results generated by the proposed approach. The results were evaluated using mean reciprocal rank (MRR).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Manual evaluation showed that it is feasible to identify alternative medical concepts by using professional or consumer concepts as queries in the word vector space without fine tuning, but the results are more promising in the final fine-tuned word vector space. The MRR values indicated that on an average, a professional or consumer concept is about 14th closest to its counterpart in the word vector space without fine tuning, and the MRR in the final fine-tuned word vector space is 8. Furthermore, the results demonstrate that our method can collect abbreviations and common typos frequently used by consumers.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>By integrating a large amount of text information and existing consumer health vocabularies, our method outperformed several baseline ranking methods and is effective for generating a list of candidate terms for human review during consumer health vocabulary development.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>consumer health vocabulary</kwd>
        <kwd>word embedding</kwd>
        <kwd>representation learning</kwd>
        <kwd>natural language processing</kwd>
        <kwd>consumer health information</kwd>
        <kwd>ontology enrichment</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>In 2015, a survey of Chinese internet users showed that medicine and health care are the two most popular searched topics and accounted for 55.15% of all searches [<xref ref-type="bibr" rid="ref1">1</xref>]. However, it is difficult for most users to express medical concepts using professional terms such as bronchus, brain, and extracellular space [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>], and online forums and news media explain such professional medical terms with very little detail. The gap between consumer language and medical terminology makes searching and retrieving information difficult and biases the understanding of health information [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>].</p>
        <p>Development of the consumer health vocabularies, which map languages of consumers and medical experts, is a potential way to bridge the gap. Several commercial and noncommercial groups, such as Apelon and Public Health Terminology by Intelligent Medical Objects, and Open Access and Collaborative Consumer Health Vocabulary [<xref ref-type="bibr" rid="ref8">8</xref>], tried to bind consumer health vocabularies with the Unified Medical Language System (UMLS) or the International Classification of Diseases (ICD). Several factors dominate the expansion of a quality consumer health vocabulary: a comprehensive search to identify related nonstandard expressions, abbreviations and common typos, a consensus between consumers’ point of view and professional classification, and periodic updating for new terms. These factors make the expansion process complicated, costly, and time-consuming.</p>
        <p>To accelerate the expansion process, researchers developed many approaches to extract and map consumer terms automatically or semiautomatically, including the n–gram-based approach [<xref ref-type="bibr" rid="ref9">9</xref>], pattern-based approach [<xref ref-type="bibr" rid="ref7">7</xref>], co-occurrence analysis [<xref ref-type="bibr" rid="ref10">10</xref>], and machine learning methods [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Although the consumer health vocabularies mined through these hand-crafted heuristic approaches are more accurate, many relevant pairs could be missing. Recent theoretical and experimental results from Wang et al [<xref ref-type="bibr" rid="ref12">12</xref>] showed that matching professional-consumer concept pairs through text embedding approaches can capture the semantic similarities between professional concepts and consumer concepts, thus yielding a high recall. However, with only unsupervised algorithms, many irrelevant pairs could be generated. In order to retain the advantage of text embedding and improve precision as much as possible, we propose a semisupervised representation learning method to make the concept embedding specific in the consumer vocabulary mining process. With the knowledge introduced by the reviewer, concept embeddings can continuously improve themselves. Our approach provides a related consumer term list sorted by their semantic distance to a particular medical term and helps reviewers identify synonym pairs efficiently. We extracted consumer health terms from one of the most popular health forums in China and manually evaluated the performance of our approach. The experimental results are promising, showing performance improvement of up to 16% with a small amount of seed pairs.</p>
      </sec>
      <sec>
        <title>Synonym Identification</title>
        <p>Two mainstream approaches for identifying synonyms are rule-based algorithm and word similarity measurement. A rule-based algorithm identifies synonyms by semantic patterns. For example, Vydiswaran et al took advantage of common linking phrases such as “also called,” “also known as,” and “also referred to as” to extract synonyms from Wikipedia [<xref ref-type="bibr" rid="ref7">7</xref>]. There are many ways to calculate word similarities, including n-gram, edit-distance (Levenshtein distance), WordNet-distance, and cosine-distance between word vectors [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. Among them, training distributed word vectors and extracting synonyms from top closest words are the most popular ways. Henriksson et al created word vectors using latent semantic analysis with random indexing and permutation to identify medical synonyms and abbreviation-expansion pairs [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. He et al created word vectors including linguistic, contextual, and statistical features and used K-means to gather new consumer health terms on social media [<xref ref-type="bibr" rid="ref19">19</xref>]. Elhadad et al created word vectors combining both contextual and semantic features and cluster terms from breast cancer forums into predefined semantic categories [<xref ref-type="bibr" rid="ref20">20</xref>]. Wang et al created word vectors using word2vec, an open-source natural language processing tool released by Google, to extract symptoms from UMLS [<xref ref-type="bibr" rid="ref12">12</xref>].</p>
      </sec>
      <sec>
        <title>Development of Consumer Health Vocabularies</title>
        <p>As early as 1998, Marshall [<xref ref-type="bibr" rid="ref21">21</xref>] mapped the consumer health terminology from WellMed (a health care website) to SNOMED (Systematized Nomenclature of Medicine) and UMLS, which helped patients search information using nonprofessional expressions. In 2001, Patrick expanded UMLS, the Eurodicautom of the European Commission’s Translation Service, and the European Commission Glossary of popular and technical medical terms, by adding words from the Dictionary of American Regional English, but only focused on diabetes-related terms [<xref ref-type="bibr" rid="ref22">22</xref>]. Both Marshall and Patrick constructed their consumer health vocabularies manually, which is inefficient and unscalable. In 2005, Zeng developed a two-step approach, which combined corpus-based text analysis and manual review, to build an open-source consumer health vocabulary [<xref ref-type="bibr" rid="ref23">23</xref>]. To reduce the labor in term mapping, Zeng improved the two-step approach by adding n-gram, logistic regression, and even natural language processing and machine learning algorithm (parts of speech, noun phrases, and named entities recognition) [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. Since then, the two-step semiautomatic approach—term identification algorithms followed by manual review—evolved into a common practice in many consumer health vocabulary researches [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref24">24</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>To alleviate the problems mentioned above, we propose a consumer health term–finding framework based on a distributed word vector space model. The overview of the framework is shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>. The workflow can be interpreted as a feed-forward neural network. The first step of our approach requires a corpus of raw text for the unsupervised pretraining of the embedding matrix <italic>E</italic> as the embedding layer (<xref ref-type="fig" rid="figure1">Figure 1</xref>). Text embedding approaches have proven to be very effective in capturing the similarities between words and phrases, which can yield professional-consumer concept pairs that cannot be found by feature- and pattern-based methods.</p>
        <p>We used THULAC (Tsinghua University - Lexical Analyzer for Chinese) [<xref ref-type="bibr" rid="ref25">25</xref>], a Chinese segmentation tool, to change Chinese text into words. Thereafter, word embedding tools were used to compute the vector space, as described above. We collected professional concepts from the official Chinese version of ICD-10, calculated the frequency of words in the corpus, and extracted those with a count of over 1000 and their corresponding consumer concepts with context as seed pairs. All the weights were initialized uniformly at random. In order to obtain a model that takes professional concepts as input and consumer concept list as output after training, we introduced an embedding space–adapting process consisting of an embedding projection and a supervised ranking method. The embedding projection contains a projection layer, a hidden layer, and a target projection layer (<xref ref-type="fig" rid="figure1">Figure 1</xref>) to achieve a “smaller” embedding space that preserves more supervisory signal.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>The overall architecture of our model. The consumer and professional terms start from both ends of the model, going through some embedding and projection layers, so that they are projected into a unified semantic space, where the ranking loss will be measured.</p>
            </caption>
            <graphic xlink:href="medinform_v7i2e12704_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        <p>The output concept list is not necessarily the most similar word of the input concept in the word vector space, but with the supervised signal of seed terms, their similarity is more of a measure of professional-consumer concept pair similarity. A special ranking loss function is used in the ranking loss layer to calculate the similarity of professional-consumer concept pairs. After manually selecting the output professional-consumer concept pairs, we input the selection results into the training data. New professional-consumer concept pairs were discovered through iterations.</p>
      </sec>
      <sec>
        <title>Word Embedding</title>
        <p>Word embeddings are generally trained to reconstruct linguistic contexts of words by optimizing an objective function that can be measured without annotations. One popular approach is to estimate the embeddings by maximizing the probability that the words within a given window size are predicted correctly. Word embedding takes a large corpus of text as its input and produces a vector space, with each unique word in the corpus assigned a corresponding vector in the space. In word embedding training, one of the key issues is the formulation of the training objective function, minimization or maximization of which may produce meaningful word vector representations. Ideally, the training objective function should reflect the fact that the semantic word similarities measured on learned word vectors are consistent with human cognition. Recently Wang et al [<xref ref-type="bibr" rid="ref26">26</xref>] performed a comprehensive comparative study on the different word embedding techniques in biomedical texts. Of those, we chose three popular word embedding methods: Word2Vec [<xref ref-type="bibr" rid="ref27">27</xref>], Global Vectors (GloVe) [<xref ref-type="bibr" rid="ref28">28</xref>], and FastText [<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        <sec>
          <title>Word2Vec</title>
          <p>Word2Vec is a widely used method in natural language processing for generating word embeddings. It has two different training strategies: (1) Continuous Bag-of-Words, in which the model is given a sequence of words without the middle one and attempts to predict this omitted word, and (2) Skip-Gram, in which the model is given a word and attempts to predict its neighboring words. In both cases, the model consists of only a single weight matrix (apart from the word embedding), which results in a fast log-linear training process that can capture semantic information [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
        </sec>
        <sec>
          <title>Global Vectors</title>
          <p>The GloVe method was proposed by Pennington et al [<xref ref-type="bibr" rid="ref28">28</xref>] and obtained state-of-the-art results for syntactic and semantic analogies tasks. This method has a co-occurrence matrix M that is constructed by looking at the context words. Each element <italic>M</italic><sub>ij</sub> in the matrix represents the probability of the word <italic>i</italic> being similar to the word <italic>j</italic>. In the matrix M, the vectors are randomly generated and trained with the equation <italic>P</italic> (<italic>w</italic><sub>i</sub>, <italic>w</italic><sub>j</sub>)=<italic>log</italic> (<italic>M</italic><sub>ij</sub>)=<italic>w</italic><sub>i</sub><italic>w</italic><sub>j</sub>+<italic>b</italic><sub>i</sub>+<italic>b</italic><sub>j</sub>, where <italic>w</italic><sub>i</sub> and <italic>w</italic><sub>j</sub> are word vectors and <italic>b</italic><sub>i</sub> and <italic>b</italic><sub>j</sub> are biases.</p>
        </sec>
        <sec>
          <title>FastText</title>
          <p>FastText is a recently developed method [<xref ref-type="bibr" rid="ref29">29</xref>] proposed by the same group who developed word2vec, in which the embeddings are associated with character n-grams and the words are represented as the summation of these representations. Specifically, a word representation is induced by summing character n-gram vectors with vectors of surrounding words. Therefore, this method attempts to capture morphological information to induce word embedding.</p>
        </sec>
      </sec>
      <sec>
        <title>Adapting Embedding With Supervised Training</title>
        <p>As mentioned in the Introduction, word embedding is a useful unsupervised technique to capture the similarity between words and phrases, which can yield high recall of professional-consumer concept pairs. It can also be used as a pretraining phase prior to supervised training. However, even if the embeddings provide compact, real, valued representations of each word in a vocabulary, it only indicates that word embeddings produce a semantic space that models synonymy to a certain degree. Current methods use pretrained embedding to initialize model parameters and then use the labeled data to guide them for the intended task (eg, we use professional-consumer concept pairs that already exist as the supervision to produce a semantic space dedicated to finding such pairs). If, as in our case, only a small amount of supervised data are available, this can lead to severe overfitting. Furthermore, rare words will receive very few updates and their embedding will be poorly adapted for our task. We propose two solutions to avoid these problems.</p>
        <sec>
          <title>Embedding Projection</title>
          <p>Let denote the original embedding matrix obtained. We define the adapted embedding matrix as the multiplication <italic>S</italic> • <italic>E</italic>, where the projection matrix and <italic>s</italic>&lt; <italic>e</italic>. We estimate the parameters of the matrix <italic>S</italic> using the labeled dataset, while <italic>E</italic> is kept fixed. In other words, we determine the optimal projection of the embedding matrix <italic>E</italic> into a subspace. The ideal embedding subspace relies on two fundamental principles:</p>
          <list list-type="order">
            <list-item>
              <p>With dimensionality reduction of the embedding, the model can better fit the complexity of our consumer health vocabularies task or the amount of available data. As the number of professional-consumer concept pairs increase, the size of the embedding can be adjusted.</p>
            </list-item>
            <list-item>
              <p>Using a projection, all embeddings are indirectly updated, not only for the words present in the labeled dataset.</p>
            </list-item>
          </list>
          <p>Let <italic>M</italic>=[<italic>w</italic><sub>1</sub>… <italic>w</italic><sub>n</sub>] denote a message of n words. Each column w∈{0,1}<sup>v</sup><sup>×</sup><sup>1</sup> of m represents a word in one-hot form. is the projection vector for each word, given by <italic>P</italic>= <italic>S</italic> • <italic>E</italic> • <italic>M</italic>. A simple adapting rule is to keep the original <italic>S</italic> fixed and append a new random initial matrix to <italic>S</italic> to obtain the new <italic>S’</italic> for retraining.</p>
          <p>Compared to a conventional feed-forward network employing embedding for natural language, two main differences arise. First, the input layer is factorized into two components—the embedding attained in unsupervised form <italic>E</italic> and the projection matrix <italic>S</italic>. Second, the size of the subspace in which the embeddings are projected is much smaller than that of the original embedding with typical reductions above one order of magnitude. As is usual in this kind of model, all the parameters can be trained with gradient methods, using the back-propagation update rule.</p>
        </sec>
        <sec>
        <title>Supervised Ranking Method</title>
        <p>One of the challenges for supervised word embedding training is the difficulty of defining the exact similarity values between two words. Especially in our case, the professional concept and the consumer concept are different. The similarity measure is affected by many factors such as the dimensionality of the embedding, the employed learning algorithms, and the corpus size. Although the similarity values are quite different, the ranking of similarity values is more robust than the values itself.</p>
        <p>Inspired by this finding, we employed ranking information as the supervised training targets. The ranking loss function <inline-graphic xlink:href="medinform_v7i2e12704_fig2.png" mimetype="image" xlink:type="simple"/>is obtained as,</p>  
        <p><graphic xlink:href="medinform_v7i2e12704_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/></p>
        <p>where <italic>V</italic> is the vocabulary, <italic>ω</italic><sub>v</sub> is a specific word, and <graphic xlink:href="medinform_v7i2e12704_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>is the set of synonym words of <italic>ω</italic><sub>v</sub> in the labeled set. <graphic xlink:href="medinform_v7i2e12704_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>is the rank of <italic>ω</italic><sub>r</sub> in the labeled set, and <graphic xlink:href="medinform_v7i2e12704_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>is the rank of <italic>ω</italic><sub>r</sub> according to its cosine similarity with <italic>ω</italic><sub>v</sub> measured in the embedding space.</p>
        <p>Because the ranking loss is not differentiable, we choose to minimize the semantic similarity loss between the desired ranking position and the real ranking position in the embedding space as a surrogate. Given the desired ranking position, the similarity value corresponding to the desired ranking position is employed as the real training target. Minimizing the difference of similarity values between the desired position and the real position may also reduce the ranking loss. The similarity value lies in function <inline-graphic xlink:href="medinform_v7i2e12704_fig7.png" mimetype="image" xlink:type="simple"/>, given below, where <graphic xlink:href="medinform_v7i2e12704_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>denotes the sorted similarity values for word <italic>ω</italic><sub>v</sub>:</p>
        <p><graphic xlink:href="medinform_v7i2e12704_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/></p></sec>
      </sec>
      <sec>
        <title>Experiment</title>
        <p>To evaluate the effectiveness of the proposed model, three groups of experiments were designed. The three kinds of word embeddings with different vector size were further trained by the proposed model and evaluated. The baselines were the original word embeddings described above. The effect of the projection layer was studied in the second group experiments. Two comparison groups were involved, one that used the standard structure without the projection layer and another that used the proposed projection layer.</p>
      </sec>
      <sec>
        <title>Data Sets</title>
        <p>We tested our methods with the corpus obtained from two different Chinese communities to cover different perspectives. The Tianya community is one of the most popular online forums in China, and the data are open and easy to retrieve. The health care sector of the Tianya community—Tianya Hospital—has a large number of disease consulting posts initiated by consumers, and about 180 Mb of data are used in our experiment. Haodf is the largest Chinese medical question-and-answer website where all questions are created by patients and answered by doctors, and about 2 Gb of data are used in our experiment. We considered these to be ideal sources of consumer health corpus, used Scrapy [<xref ref-type="bibr" rid="ref30">30</xref>] for a full-text crawling from those corpora, and removed user information before further processing. The messages from the consumer forums were preprocessed as follows: URLs were replaced with a token URL and words occurring less than 30 times in the corpus were replaced by a special UNKNOW symbol. We collected professional concepts from the Chinese official version of ICD-10, calculated the frequency of words in these two corpora, extracted them with a count of over 1000, and manually collected their corresponding consumer concepts with context as seed pairs. The annotation process was performed by one medical professional and reviewed by three medical professionals. We finally obtained 224 seed pairs that all three reviewers consistently agreed upon.</p>
      </sec>
      <sec>
        <title>Evaluation Method</title>
        <p>We use the mean reciprocal rank to evaluate the quality of word embeddings. Mean reciprocal rank is a statistic measure for evaluating any process that produces a list of possible response to a sample of queries and orders them by probability of correctness. The mean reciprocal rank is the average of the reciprocal ranks of results for a sample of queries Q: <graphic xlink:href="medinform_v7i2e12704_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>where <italic>rank</italic><sub>i</sub> refers to the rank position of the first relevant synonym for the <italic>i</italic>-th query.</p>
        <p>We use a large collection of candidate medical concepts and build a small set of ground truth professional-consumer concept pairs. We randomly select 100 pairs from our seed pairs for evaluation.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Principal Findings</title>
        <p>In general, the performance of the proposed method is detailed in <xref ref-type="table" rid="table1">Table 1</xref>. All word embeddings are significantly enhanced after fine tuning. The performance of the best word embedding is FastText, with a 400-dimensional vector with Haodf and projection matrix size set to 40, and it is also significantly improved in all datasets. The rich n-gram features used in FastText are important in Chinese synonym finding and have much higher performance than others. These remarkable improvements demonstrate that our method may transfer the complementary knowledge from the weak embeddings into the strong embeddings.</p>
      </sec>
      <sec>
        <title>Effect of the Projection Layer</title>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows the system performance with no projection matrix and different projection matrix size. As baselines, we considered a simple log-linear approach, which uses the unsupervised embeddings directly as features in a log-linear classifier. We tested the model performance. Furthermore, we observed that updating the embeddings always led to inferior results. This suggests that pretrained embeddings should be kept fixed, when little labeled data are available to retrain them.</p>
      </sec>
      <sec>
        <title>Manual Review of the Recommended Consumer Health Terms</title>
        <p>In order to ensure the accuracy of professional-consumer concept pairs, manual review is inevitable. <xref ref-type="table" rid="table3">Table 3</xref> showed the top 10 candidates for word “diarrhea” (“腹泻”) provided by our method for reviewers. Most words illustrated here are symptoms or clinical findings related to “diarrhea,” such as “vomiting” (“呕吐”), “abdominal pain” (“腹痛”), and “dyspepsia” (“消化不良”). We see two synonyms in the table: “having loose bowels” (“拉肚子”, ranked third) and “diarrhea” (“腹泄”, ranked seventh). The former is a consumer health term that is rarely used by professionals, and the latter is a typo of “diarrhea” (“腹泻”). In the manual review, researchers reviewed a sample of the candidate terms suggested by the system to assess whether these terms should be added into the consumer health vocabularies.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Performance of three word embedding methods with different embedding sizes. Italicized values indicate the best performance of the date set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="160"/>
            <col width="90"/>
            <col width="90"/>
            <col width="90"/>
            <col width="90"/>
            <col width="90"/>
            <col width="90"/>
            <col width="90"/>
            <col width="90"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td rowspan="2" colspan="2">Corpus and tuning state</td>
                <td colspan="3">GloVe<sup>a</sup></td>
                <td colspan="3">Word2Vec</td>
                <td colspan="3">FastText</td>
              </tr>
              <tr valign="top">
                <td>100<sup>b</sup></td>
                <td>200</td>
                <td>400</td>
                <td>100</td>
                <td>200</td>
                <td>400</td>
                <td>100</td>
                <td>200</td>
                <td>400</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="11"><bold>Tianya</bold></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Before</td>
                <td>0.266</td>
                <td>0.263</td>
                <td>0.282</td>
                <td>0.289</td>
                <td>0.296</td>
                <td>0.272</td>
                <td>0.341</td>
                <td>0.340</td>
                <td>0.319</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>After</td>
                <td>0.313</td>
                <td>0.308</td>
                <td>0.320</td>
                <td>0.325</td>
                <td>0.338</td>
                <td>0.341</td>
                <td>0.355</td>
                <td>0.362</td>
                <td><italic>0.371</italic></td>
              </tr>
              <tr valign="top">
                <td colspan="11"><bold>Haodf</bold></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Before</td>
                <td>0.270</td>
                <td>0.273</td>
                <td>0.289</td>
                <td>0.288</td>
                <td>0.290</td>
                <td>0.295</td>
                <td>0.320</td>
                <td>0.322</td>
                <td>0.331</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>After</td>
                <td>0.321</td>
                <td>0.326</td>
                <td>0.332</td>
                <td>0.313</td>
                <td>0.344</td>
                <td>0.346</td>
                <td>0.361</td>
                <td>0.365</td>
                <td><italic>0.385</italic></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>GloVe: Global Vectors.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Values in this row indicate embedding size.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Performance of FastText-200 with different sizes of the projection matrix.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="190"/>
            <col width="170"/>
            <col width="170"/>
            <col width="170"/>
            <col width="170"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td rowspan="2">Corpus</td>
                <td colspan="5">Projection matrix size</td>
              </tr>
              <tr valign="top">
                <td>0<sup>a</sup></td>
                <td>20</td>
                <td>40</td>
                <td>80</td>
                <td>160</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Tianya</td>
                <td>0.350</td>
                <td>0.352</td>
                <td>0.362</td>
                <td>0.357</td>
                <td>0.345</td>
              </tr>
              <tr valign="top">
                <td>Haodf</td>
                <td>0.338</td>
                <td>0.342</td>
                <td>0.365</td>
                <td>0.360</td>
                <td>0.331</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Projection matrix size 0 is used to denote the baseline (log-linear model).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Top 10 candidates for the seed word “diarrhea.”</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="400"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td>Rank</td>
                <td>Medical words in Chinese</td>
                <td>Medical words in English</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>呕吐</td>
                <td>Vomiting</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>腹胀</td>
                <td>Ventosity</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>拉肚子</td>
                <td>Having loose bowels</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>腹痛</td>
                <td>Abdominal pain</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>便秘</td>
                <td>Constipation</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>消化不良</td>
                <td>Dyspepsia</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>腹泄</td>
                <td>Diarrhea</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>厌食</td>
                <td>Anorexia</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>肠鸣</td>
                <td>Borborygmus</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>返酸</td>
                <td>Acid reflux</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>Bridging the language gap between consumers and medical professionals is a fundamental problem in medical internet research. There has been some research on building the consumer health vocabulary for English medical terms, but the research on other languages is scarce. The model developed in this paper was evaluated using Chinese terms and could help professionals collect consumer health vocabularies related to certain clinical topics and discover synonyms in a more effective and efficient way.</p>
      <p>From the methodology perspective, we adopted unsupervised word embedding as the backbone of our approach. This mechanism encodes words into vectors based on the context they are likely to be put into and projects them into a common semantic space. We further fine-tuned the word embeddings to make them align with the limited supervision information provided. A previous study used word vectors trained on a large-scale corpus to explore semantic relationships such as analogy, subordination, and comparison [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. In our corpus, the context of a diagnostic term could always be related diseases, symptoms, and drugs. Therefore, the embeddings of similar terms or synonyms with similar context will be close to each other in the space after the training process.</p>
      <p>Our algorithm can correctly identify over 80% of the synonyms by just searching from the top 10 candidates of a certain medical term. We further summarize these synonyms into three classes: (1) Colloquial expressions; for example, consumers say “having loose bowels” (“拉肚子”) rather than “diarrhea” (“腹泻”) and “zits” (“青春痘”) rather than “acne” (“粉刺”). (2) Typos; for example, consumers always misspell “腹泻” (“diarrhea”) as “腹泄” and “黄疸” (“jaundice”) as “黄胆.” (3) The symptoms or findings from traditional Chinese medicine; for example, Chinese medicine refers to “stomachache” (“胃痛”) as “epigastric pain” (“胃脘痛”). Besides the “typo” synonyms, other two classes of synonyms do not necessarily share common characters with each other or source terms. Therefore, simple character-based matching approaches such as n-gram and edit-distance do not help in these cases. Semantic pattern-based algorithms depend less on exact common characters; however, consumers in online communities and social media express themselves in a more casual way, and we may not be able to create and maintain a comprehensive semantic pattern list to capture all the variations and diversities. Our method can fill in such a language gap and effectively expand the synonyms and consumer health vocabularies. We validated the effectiveness of our approach with the 180-Mb Tianya corpus and the 2-Gb Haodf corpus. The results indicate that the larger the corpus, the better the learning.</p>
      <p>One limitation of our approach is that we cannot handle a case when a new professional term is needed in the vocabulary, especially the newly formulated professional term, for example, the extracellular space and the interstitial system [<xref ref-type="bibr" rid="ref31">31</xref>]. This is because we adopted a matching-based framework. However, it is not difficult to extend the current algorithm to gain such capability. For example, we can normalize the similarity between a specific consumer term to all professional terms in the dictionary and thus make these similarities a probability distribution. Thereafter, we can use entropy for this distribution to determine whether we need a new professional term. A high entropy indicates that the consumer term is not really similar to any of the existing professional terms, and thus, a new term may be needed.</p>
      <p>For the first time in the Chinese medical terminology field, this study verified the effectiveness of word semantic representations and their potential for linking narrative consumer terms to clinical terms. This approach can discover consumer expressions such as spelling errors and nonstandard abbreviations, which are usually missed in the traditional consumer health vocabularies, and enrich the consumer health vocabularies to meet consumer requirements for information retrieval. The candidate consumer term list automatically generated by our model can be employed as an important reference for professionals to discover synonyms in a more efficient way.</p>
    </sec>
  </body>
  <back>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">GloVe</term>
          <def>
            <p>Global Vectors</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">MRR</term>
          <def>
            <p>mean reciprocal rank</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">SNOMED</term>
          <def>
            <p>Systematized Nomenclature of Medicine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">THULAC</term>
          <def>
            <p>Tsinghua University - Lexical Analyzer for Chinese</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>All coauthors contributed equally. This study is partly sponsored by the National Natural Science Foundation of China (Grants: #81471756 #81771937). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <collab>China Institute of Popular Science</collab>
        </person-group>
        <article-title>An Analysis Report on the Search Demand Behavior of Chinese Netizens in Popular Science in 2015</article-title>
        <source>Science and Technology Review</source>  
        <year>2016</year>  
        <volume>34</volume>  
        <issue>12</issue>  
        <fpage>29</fpage>  
        <lpage>33</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.kjdb.org/CN/Y2016/V34/I12/29"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>QT</given-names>
          </name>
          <name name-style="western">
            <surname>Tse</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>Exploring and developing consumer health vocabularies</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2006</year>  
        <volume>13</volume>  
        <issue>1</issue>  
        <fpage>24</fpage>  
        <lpage>9</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/lookup/pmidlookup?view=long&amp;pmid=16221948"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M1761</pub-id>
        <pub-id pub-id-type="medline">16221948</pub-id>
        <pub-id pub-id-type="pii">M1761</pub-id>
        <pub-id pub-id-type="pmcid">PMC1380193</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Holtz</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Vasold</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Cotten</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Mackert</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Health Care Provider Perceptions of Consumer-Grade Devices and Apps for Tracking Health: A Pilot Study</article-title>
        <source>JMIR Mhealth Uhealth</source>  
        <year>2019</year>  
        <month>01</month>  
        <day>22</day>  
        <volume>7</volume>  
        <issue>1</issue>  
        <fpage>e9929</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://mhealth.jmir.org/2019/1/e9929.DOI:"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/mhealth.9929</pub-id>
        <pub-id pub-id-type="medline">30668515</pub-id>
        <pub-id pub-id-type="pii">v7i1e9929</pub-id>
        <pub-id pub-id-type="pmcid">PMC6362391</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lei</surname>
            <given-names>Yiming</given-names>
          </name>
          <name name-style="western">
            <surname>Han</surname>
            <given-names>Hongbin</given-names>
          </name>
          <name name-style="western">
            <surname>Yuan</surname>
            <given-names>Fan</given-names>
          </name>
          <name name-style="western">
            <surname>Javeed</surname>
            <given-names>Aqeel</given-names>
          </name>
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>Yong</given-names>
          </name>
        </person-group>
        <article-title>The brain interstitial system: Anatomy, modeling, in vivo measurement, and applications</article-title>
        <source>Prog Neurobiol</source>  
        <year>2017</year>  
        <month>10</month>  
        <volume>157</volume>  
        <fpage>230</fpage>  
        <lpage>246</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0301-0082(15)30069-1"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.pneurobio.2015.12.007</pub-id>
        <pub-id pub-id-type="medline">26837044</pub-id>
        <pub-id pub-id-type="pii">S0301-0082(15)30069-1</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yangarber</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Grishman</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Unsupervised learning of generalized names</article-title>
        <source>COLING</source>  
        <year>2002</year>  
        <conf-name>Proceedings of the 19th international conference on Computational linguistics</conf-name>
        <conf-date>2002</conf-date>
        <conf-loc>Taipei, Taiwan</conf-loc>
        <publisher-name>Association for Computational Linguistics</publisher-name>
        <pub-id pub-id-type="doi">10.3115/1072228.1072382</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Slaughter</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Ruland</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Rotegård</surname>
            <given-names>AK</given-names>
          </name>
        </person-group>
        <article-title>Mapping cancer patients' symptoms to UMLS concepts</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2005</year>  
        <fpage>699</fpage>  
        <lpage>703</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/16779130"/>
        </comment>  
        <pub-id pub-id-type="medline">16779130</pub-id>
        <pub-id pub-id-type="pii">58504</pub-id>
        <pub-id pub-id-type="pmcid">PMC1560789</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Vydiswaran</surname>
            <given-names>VGV</given-names>
          </name>
          <name name-style="western">
            <surname>Mei</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Hanauer</surname>
            <given-names>DA</given-names>
          </name>
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>Mining consumer health vocabulary from community-generated text</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2014</year>  
        <volume>2014</volume>  
        <fpage>1150</fpage>  
        <lpage>9</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25954426"/>
        </comment>  
        <pub-id pub-id-type="medline">25954426</pub-id>
        <pub-id pub-id-type="pmcid">PMC4419967</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Keselman</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>CA</given-names>
          </name>
          <name name-style="western">
            <surname>Divita</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Browne</surname>
            <given-names>AC</given-names>
          </name>
          <name name-style="western">
            <surname>Leroy</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Zeng-Treitler</surname>
            <given-names>Q</given-names>
          </name>
        </person-group>
        <article-title>Consumer health concepts that do not map to the UMLS: where do they fit?</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2008</year>  
        <volume>15</volume>  
        <issue>4</issue>  
        <fpage>496</fpage>  
        <lpage>505</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/lookup/pmidlookup?view=long&amp;pmid=18436906"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M2599</pub-id>
        <pub-id pub-id-type="medline">18436906</pub-id>
        <pub-id pub-id-type="pii">M2599</pub-id>
        <pub-id pub-id-type="pmcid">PMC2442253</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Doing-Harris</surname>
            <given-names>KM</given-names>
          </name>
          <name name-style="western">
            <surname>Zeng-Treitler</surname>
            <given-names>Q</given-names>
          </name>
        </person-group>
        <article-title>Computer-assisted update of a consumer health vocabulary through mining of social network data</article-title>
        <source>J Med Internet Res</source>  
        <year>2011</year>  
        <month>05</month>  
        <day>17</day>  
        <volume>13</volume>  
        <issue>2</issue>  
        <fpage>e37</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2011/2/e37/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.1636</pub-id>
        <pub-id pub-id-type="medline">21586386</pub-id>
        <pub-id pub-id-type="pii">v13i2e37</pub-id>
        <pub-id pub-id-type="pmcid">PMC3221384</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Using Co-occurrence Analysis to Expand Consumer Health Vocabularies from Social Media Data</article-title>
        <year>2013</year>  
        <month>9</month>  
        <day>11</day>  
        <conf-name>2013 IEEE International Conference on Healthcare Informatics</conf-name>
        <conf-date>2013</conf-date>
        <conf-loc>Philadelphia, PA</conf-loc>
        <fpage>74</fpage>  
        <lpage>81</lpage>  
        <pub-id pub-id-type="doi">10.1109/ICHI.2013.16</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>QT</given-names>
          </name>
          <name name-style="western">
            <surname>Tse</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Divita</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Keselman</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Crowell</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Browne</surname>
            <given-names>AC</given-names>
          </name>
          <name name-style="western">
            <surname>Goryachev</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Ngo</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Term identification methods for consumer health vocabulary development</article-title>
        <source>J Med Internet Res</source>  
        <year>2007</year>  
        <month>02</month>  
        <day>28</day>  
        <volume>9</volume>  
        <issue>1</issue>  
        <fpage>e4</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2007/1/e4/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.9.1.e4</pub-id>
        <pub-id pub-id-type="medline">17478413</pub-id>
        <pub-id pub-id-type="pii">v9i1e4</pub-id>
        <pub-id pub-id-type="pmcid">PMC1874512</pub-id></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Tang</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Vydiswaran</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Mei</surname>
            <given-names>Q</given-names>
          </name>
        </person-group>
        <article-title>Matching Consumer Health Vocabulary with Professional Medical Terms Through Concept Embedding</article-title>
        <year>2017</year>  
        <conf-name>AMIA 2017 Annual Symposium</conf-name>
        <conf-date>2017</conf-date>
        <conf-loc>Washington, DC</conf-loc>
        <publisher-name>American Medical Informatics Association</publisher-name></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Jelinek</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Mercer</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Roukos</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Classifying words for improved statistical language models</article-title>
        <year>1990</year>  
        <conf-name>International Conference on Acoustics, Speech, and Signal Processing</conf-name>
        <conf-date>1990</conf-date>
        <conf-loc>Albuquerque, NM</conf-loc>
        <publisher-name>IEEE</publisher-name>
        <fpage>621</fpage>  
        <lpage>4</lpage>  
        <pub-id pub-id-type="doi">10.1109/ICASSP.1990.115789</pub-id></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Blondel</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Gajardo</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Heymans</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Senellart</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Van Dooren</surname>
            <given-names>PM</given-names>
          </name>
        </person-group>
        <article-title>A measure of similarity between graph vertices: Applications to synonym extraction and web searching</article-title>
        <source>SIAM review</source>  
        <year>2004</year>  
        <volume>46</volume>  
        <issue>4</issue>  
        <fpage>A</fpage>  
        <pub-id pub-id-type="doi">10.2307/20453570</pub-id></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Martino</surname>
            <given-names>Bd</given-names>
          </name>
        </person-group>
        <article-title>Semantic web services discovery based on structural ontology matching</article-title>
        <source>IJWGS</source>  
        <year>2009</year>  
        <volume>5</volume>  
        <issue>1</issue>  
        <fpage>46</fpage>  
        <lpage>65</lpage>  
        <pub-id pub-id-type="doi">10.1504/IJWGS.2009.023868</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hill</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Cho</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Jean</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Devin</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Bengio</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <source>arXiv</source>  
        <year>2014</year>  
        <access-date>2019-05-13</access-date>
        <comment>Embedding word similarity with neural machine translation 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1412.6448">https://arxiv.org/abs/1412.6448</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78LAJpW1Q"/></comment> </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Henriksson</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Conway</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Duneld</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Chapman</surname>
            <given-names>W</given-names>
          </name>
        </person-group>
        <article-title>Identifying synonymy between SNOMED clinical terms of varying length using distributional analysis of electronic health record</article-title>
        <year>2013</year>  
        <conf-name>AMIA Annual Symposium Proceedings</conf-name>
        <conf-date>2013</conf-date>
        <conf-loc>Washington, DC</conf-loc></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Henriksson</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Moen</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Skeppstedt</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Daudaravičius</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Duneld</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Synonym extraction and abbreviation expansion with ensembles of semantic spaces</article-title>
        <source>J Biomed Semantics</source>  
        <year>2014</year>  
        <month>02</month>  
        <day>05</day>  
        <volume>5</volume>  
        <issue>1</issue>  
        <fpage>6</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://jbiomedsem.biomedcentral.com/articles/10.1186/2041-1480-5-6"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/2041-1480-5-6</pub-id>
        <pub-id pub-id-type="medline">24499679</pub-id>
        <pub-id pub-id-type="pii">2041-1480-5-6</pub-id>
        <pub-id pub-id-type="pmcid">PMC3937097</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>He</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Oh</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Hou</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Bian</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Enriching consumer health vocabulary through mining a social Q&amp;A site: A similarity-based approach</article-title>
        <source>J Biomed Inform</source>  
        <year>2017</year>  
        <month>12</month>  
        <volume>69</volume>  
        <fpage>75</fpage>  
        <lpage>85</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30065-5"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2017.03.016</pub-id>
        <pub-id pub-id-type="medline">28359728</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(17)30065-5</pub-id>
        <pub-id pub-id-type="pmcid">PMC5488691</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Elhadad</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Driscoll</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Brody</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Characterizing the sublanguage of online breast cancer forums for medications, symptoms, and emotions</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2014</year>  
        <volume>2014</volume>  
        <fpage>516</fpage>  
        <lpage>25</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25954356"/>
        </comment>  
        <pub-id pub-id-type="medline">25954356</pub-id>
        <pub-id pub-id-type="pmcid">PMC4419934</pub-id></nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Marshall</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>Bridging the terminology gap between health care professionals and patients with the Consumer Health Terminology (CHT)</article-title>
        <year>2000</year>  
        <conf-name>Proceedings of the AMIA Symposium</conf-name>
        <conf-date>2000</conf-date>
        <conf-loc>Los Angeles, CA</conf-loc></nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Patrick</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Monga</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Sievert</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Houston Hall</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Longo</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Evaluation of controlled vocabulary resources for development of a consumer entry vocabulary for diabetes</article-title>
        <source>J Med Internet Res</source>  
        <year>2001</year>  
        <volume>3</volume>  
        <issue>3</issue>  
        <fpage>E24</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2001/3/e24/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.3.3.e24</pub-id>
        <pub-id pub-id-type="medline">11720966</pub-id>
        <pub-id pub-id-type="pmcid">PMC1761907</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Tse</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Crowell</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Divita</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Roth</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Browne</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Identifying consumer-friendly display (CFD) names for health concepts</article-title>
        <year>2005</year>  
        <conf-name>AMIA annual symposium proceedings</conf-name>
        <conf-date>2005</conf-date>
        <conf-loc>Washington, DC</conf-loc></nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Discovering Consumer Health Expressions from Consumer-Contributed Content</article-title>
        <source>Social Computing, Behavioral-Cultural Modeling and Prediction</source>  
        <year>2013</year>  
        <publisher-loc>Berlin, Heidelberg</publisher-loc>
        <publisher-name>Springer</publisher-name>
        <fpage>164</fpage>  
        <lpage>74</lpage> </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Maosong</surname>
            <given-names>Sun</given-names>
          </name>
          <name name-style="western">
            <surname>Xinxiong</surname>
            <given-names>Chen</given-names>
          </name>
          <name name-style="western">
            <surname>Kaixu</surname>
            <given-names>Zhang</given-names>
          </name>
          <name name-style="western">
            <surname>Zhipeng</surname>
            <given-names>Guo</given-names>
          </name>
          <name name-style="western">
            <surname>Zhiyuan</surname>
            <given-names>Liu</given-names>
          </name>
        </person-group>
        <source>THULAC: An efficient lexical analyzer for Chinese [in Chinese]</source>  
        <year>2016</year>  
        <access-date>2019-05-11</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://thulac.thunlp.org/">http://thulac.thunlp.org/</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="78HtNhzB9"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Afzal</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Rastegar-Mojarad</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Kingsbury</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>A comparison of word embeddings for the biomedical natural language processing</article-title>
        <source>J Biomed Inform</source>  
        <year>2018</year>  
        <month>11</month>  
        <volume>87</volume>  
        <fpage>12</fpage>  
        <lpage>20</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2018.09.008</pub-id>
        <pub-id pub-id-type="medline">30217670</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(18)30182-5</pub-id></nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mikolov</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Corrado</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Dean</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <source>arXiv</source>  
        <year>2013</year>  
        <access-date>2019-05-13</access-date>
        <comment>Efficient estimation of word representations in vector space 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1301.3781">https://arxiv.org/abs/1301.3781</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78LAlyreV"/></comment> </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Pennington</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Socher</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Manning</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Glove: Global Vectors for Word Representation</article-title>
        <year>2014</year>  
        <conf-name>EMNLP</conf-name>
        <conf-date>2014</conf-date>
        <conf-loc>Doha, Qatar</conf-loc>
        <publisher-name>Association for Computational Linguistics</publisher-name>
        <pub-id pub-id-type="doi">10.3115/v1/D14-1162</pub-id></nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bojanowski</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Grave</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Joulin</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Mikolov</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>Enriching Word Vectors with Subword Information</article-title>
        <source>Transactions of the Association for Computational Linguistics</source>  
        <year>2017</year>  
        <volume>5</volume>  
        <fpage>135</fpage>  
        <lpage>146</lpage>  
        <pub-id pub-id-type="doi">10.1162/tacl_a_00051</pub-id></nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Myers</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>McGuffee</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Choosing Scrapy</article-title>
        <source>J Comput Sci Coll</source>  
        <year>2015</year>  
        <volume>31</volume>  
        <issue>1</issue>  
        <fpage>83</fpage>  
        <lpage>9</lpage> </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Aibo</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Rui</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Dehua</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Xinrui</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Lan</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Huipo</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Lei</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Wei</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Qingyuan</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Chunyan</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Xiangping</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Ze</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Guomei</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Yuanyuan</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Yajuan</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Hongbin</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>The Drainage of Interstitial Fluid in the Deep Brain is Controlled by the Integrity of Myelination</article-title>
        <source>Aging and disease</source>  
        <year>2018</year>  
        <pub-id pub-id-type="doi">10.14336/AD.2018.1206</pub-id></nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
