<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v4i4e41</article-id>
    <article-id pub-id-type="pmid">27884812</article-id>
    <article-id pub-id-type="doi">10.2196/medinform.5748</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>Consumers’ Use of UMLS Concepts on Social Media: Diabetes-Related Textual Data Analysis in Blog and Social Q&#38;A Sites</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Eysenbach</surname>
          <given-names>Gunther</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Kruse</surname>
          <given-names>Clemens</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Wu</surname>
          <given-names>Han</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Kim</surname>
          <given-names>Sujin</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1" equal-contrib="yes">
        <name name-style="western">
          <surname>Park</surname>
          <given-names>Min Sook</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-1870-3324</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib2" corresp="yes" equal-contrib="yes">
      <name name-style="western">
        <surname>He</surname>
        <given-names>Zhe</given-names>
      </name>
      <degrees>PhD</degrees>
      <xref rid="aff1" ref-type="aff">1</xref>
      <address>
        <institution>School of Information</institution>
        <institution>Florida State University</institution>
        <addr-line>Louis Shores Building</addr-line>
        <addr-line>142 Collegiate Loop</addr-line>
        <addr-line>Tallahassee, FL, 32306</addr-line>
        <country>United States</country>
        <phone>1 850 644 5775</phone>
        <fax>1 850 644 9763</fax>
        <email>zhe.he@cci.fsu.edu</email>
      </address>  
      <xref rid="aff2" ref-type="aff">2</xref>
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-3608-0244</ext-link></contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>Chen</surname>
          <given-names>Zhiwei</given-names>
        </name>
        <degrees>BEng</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-6918-5993</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib4">
        <name name-style="western">
          <surname>Oh</surname>
          <given-names>Sanghee</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-5956-7296</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib5">
        <name name-style="western">
          <surname>Bian</surname>
          <given-names>Jiang</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-2238-5429</ext-link>
      </contrib>
    </contrib-group>
    <aff id="aff1">
    <sup>1</sup>
    <institution>School of Information</institution>
    <institution>Florida State University</institution>  
    <addr-line>Tallahassee, FL</addr-line>
    <country>United States</country></aff>
    <aff id="aff2">
    <sup>2</sup>
    <institution>Institute for Successful Longevity</institution>
    <institution>Florida State University</institution>  
    <addr-line>Tallahassee, FL</addr-line>
    <country>United States</country></aff>
    <aff id="aff3">
    <sup>3</sup>
    <institution>Department of Computer Science</institution>
    <institution>Florida State University</institution>  
    <addr-line>Tallahassee, FL</addr-line>
    <country>United States</country></aff>
    <aff id="aff4">
    <sup>4</sup>
    <institution>Department of Health Outcomes and Policy</institution>
    <institution>University of Florida</institution>  
    <addr-line>Gainesville, FL</addr-line>
    <country>United States</country></aff>
    <author-notes>
      <corresp>Corresponding Author: Zhe He 
      <email>zhe.he@cci.fsu.edu</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><season>Oct-Dec</season><year>2016</year></pub-date>
    <pub-date pub-type="epub">
      <day>24</day>
      <month>11</month>
      <year>2016</year>
    </pub-date>
    <volume>4</volume>
    <issue>4</issue>
    <elocation-id>e41</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>10</day>
        <month>3</month>
        <year>2016</year>
      </date>
      <date date-type="rev-request">
        <day>28</day>
        <month>6</month>
        <year>2016</year>
      </date>
      <date date-type="rev-recd">
        <day>2</day>
        <month>8</month>
        <year>2016</year>
      </date>
      <date date-type="accepted">
        <day>22</day>
        <month>10</month>
        <year>2016</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Min Sook Park, Zhe He, Zhiwei Chen, Sanghee Oh, Jiang Bian. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 24.11.2016.</copyright-statement>
    <copyright-year>2016</copyright-year>
    <license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/2.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="https://medinform.jmir.org/2016/4/e41/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="Background">
        <title>Background</title>
        <p>The widely known terminology gap between health professionals and health consumers hinders effective information seeking for consumers.</p>
      </sec>
      <sec sec-type="Objective">
        <title>Objective</title>
        <p>The aim of this study was to better understand consumers’ usage of medical concepts by evaluating the coverage of concepts and semantic types of the Unified Medical Language System (UMLS) on diabetes-related postings in 2 types of social media: blogs and social question and answer (Q&#38;A).</p>
      </sec>
      <sec sec-type="Methods">
        <title>Methods</title>
        <p>We collected 2 types of social media data: (1) a total of 3711 blogs tagged with “diabetes” on Tumblr posted between February and October 2015; and (2) a total of 58,422 questions and associated answers posted between 2009 and 2014 in the diabetes category of Yahoo! Answers. We analyzed the datasets using a widely adopted biomedical text processing framework Apache cTAKES and its extension YTEX. First, we applied the named entity recognition (NER) method implemented in YTEX to identify UMLS concepts in the datasets. We then analyzed the coverage and the popularity of concepts in the UMLS source vocabularies across the 2 datasets (ie, blogs and social Q&#38;A). Further, we conducted a concept-level comparative coverage analysis between SNOMED Clinical Terms (SNOMED CT) and Open-Access Collaborative Consumer Health Vocabulary (OAC CHV)—the top 2 UMLS source vocabularies that have the most coverage on our datasets. We also analyzed the UMLS semantic types that were frequently observed in our datasets.</p>
      </sec>
      <sec sec-type="Results">
        <title>Results</title>
        <p>We identified 2415 UMLS concepts from blog postings, 6452 UMLS concepts from social Q&#38;A questions, and 10,378 UMLS concepts from the answers. The medical concepts identified in the blogs can be covered by 56 source vocabularies in the UMLS, while those in questions and answers can be covered by 58 source vocabularies. SNOMED CT was the dominant vocabulary in terms of coverage across all the datasets, ranging from 84.9% to 95.9%. It was followed by OAC CHV (between 73.5% and 80.0%) and Metathesaurus Names (MTH) (between 55.7% and 73.5%). All of the social media datasets shared frequent semantic types such as “Amino Acid, Peptide, or Protein,” “Body Part, Organ, or Organ Component,” and “Disease or Syndrome.”</p>
      </sec>
      <sec sec-type="Conclusions">
        <title>Conclusions</title>
        <p>Although the 3 social media datasets vary greatly in size, they exhibited similar conceptual coverage among UMLS source vocabularies and the identified concepts showed similar semantic type distributions. As such, concepts that are both frequently used by consumers and also found in professional vocabularies such as SNOMED CT can be suggested to OAC CHV to improve its coverage.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>controlled vocabulary</kwd>
      <kwd>consumer health vocabulary</kwd>
      <kwd>concept coverage</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>There is a widely known language gap between health consumers and health care professionals [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. This gap may hinder effective communication between the 2 groups [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]; thus, impacting consumers’ health information seeking [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>] and subsequent decision making regarding their health issues [<xref ref-type="bibr" rid="ref10">10</xref>]. To assess the gap, Roberts and Demner-Fushman [<xref ref-type="bibr" rid="ref11">11</xref>] used a variety of natural language processing (NLP) techniques to analyze the difference between health questions asked by consumers and health professionals in different online question and answer (Q&#38;A) sites (eg, Yahoo! Answers, and WebMD). They found that consumer questions tend to contain more misspelled medical terms, have longer background information, and resemble open-domain language more closely than texts written by professionals. One major aspect of the gap is the difference in medical vocabulary used by consumers and health professionals. Zeng and colleagues [<xref ref-type="bibr" rid="ref12">12</xref>] observed that when searching online health information, using only consumer terms leads to poor information retrieval results. Plovnick and Zeng [<xref ref-type="bibr" rid="ref13">13</xref>] later reformulated consumers’ health queries with professional terminology and about 40% of reformulated queries yielded better search performance.</p>
        <p>To bridge the vocabulary gap between health professionals and consumers, early researchers have collected and analyzed diverse textual data generated by consumers to identify medical terms used by consumers. Brennan and Aronson [<xref ref-type="bibr" rid="ref14">14</xref>] used the MetaMap tool to extract salient concepts in nursing vocabularies from consumers’ email messages. Smith and collegues [<xref ref-type="bibr" rid="ref15">15</xref>] also used MetaMap to successfully identify the Unified Medical Language System (UMLS) concepts used by consumers in their email messages submitted to University of Pittsburg Cancer Institute’s Cancer Information and Referral Service. These studies aimed to bridge the vocabulary gap between health professionals and consumers by identifying frequently-used consumer health terms that are relevant in developing consumer-oriented health information applications and linking free text to complex clinical knowledge resources. These <italic>ad hoc</italic> studies represent early efforts in bridging the vocabulary gap.</p>
        <p>A controlled vocabulary is “an organized arrangement of words and phrases used to index content and/or to retrieve content through browsing or searching[<xref ref-type="bibr" rid="ref16">16</xref>].” In an effort to formalize consumer vocabulary for various applications, a controlled vocabulary called Open-Access Collaborative Consumer Health Vocabulary (“OAC CHV,” “CHV” for short) was recently developed as a collection of expressions and concepts that are commonly used by ordinary health information users [<xref ref-type="bibr" rid="ref17">17</xref>]. Moreover, CHV has been integrated in the largest medical terminological system–the UMLS, which has mapped terms from different source vocabularies with the same meaning into the same concept by the United States National Library of Medicine (NLM). As such, consumer terms are connected to their corresponding professional terms in professional vocabularies such as SNOMED Clinical Terms (SNOMED CT). With CHV in the UMLS, one can translate a sentence with consumer terms to a sentence with professional terms in an automated fashion.</p>
        <p>Domain coverage—the extent to which a controlled vocabulary covers the intended domain—is one of the most desired properties for a controlled vocabulary [<xref ref-type="bibr" rid="ref18">18</xref>]. The usability and the overall structure of a controlled vocabulary heavily rely upon its coverage [<xref ref-type="bibr" rid="ref19">19</xref>]. Traditionally, controlled vocabulary development takes a top-down approach, which reflects a group of experts’ knowledge in the respective subject matter [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. For the development of CHV, however, a bottom-up approach was taken, emphasizing 2 fundamental properties: (1) CHV should capture actual consumers’ terms and expressions that reflect their health information needs, and (2) the expressions should be familiar to and used by consumers [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
        <p>To keep up with continuous evolution of medical knowledge, CHV needs to be updated and maintained by incorporating new, consumer-provided terms and expressions [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. Existing studies have shown promising results in discovering consumer terms for CHV from social media, in particular. Vydiswaran et al [<xref ref-type="bibr" rid="ref7">7</xref>] applied a pattern-based text mining approach to identify pairs of consumer and professional terms from Wikipedia. Hicks et al [<xref ref-type="bibr" rid="ref25">25</xref>] analyzed consumer messages exchanged in Twitter in order to evaluate terms related to gender identification on intake forms. Doing-Harris and Zeng-Treitler [<xref ref-type="bibr" rid="ref24">24</xref>] developed a computer assisted CHV update system, which can automatically identify prospective terms from social media. Identifying terms used by consumers in consumer-generated text in aggregate fashion can account for the variability of lay health language. These terms can be used to refine and enrich CHV [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
        <p>Consumers, however, may also learn and use professional terms [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. In this sense, medical terms that are familiar to consumers and are already established in other controlled vocabularies could be used to improve the coverage of CHV. Term reuse is a principle and best practice in ontology/terminology development as it promises to support the semantic interoperability and to reduce engineering costs [<xref ref-type="bibr" rid="ref27">27</xref>]. Researchers have previously developed semi-automated methods to facilitate systematic term reuse. He et al [<xref ref-type="bibr" rid="ref28">28</xref>] developed a topological-pattern-based method to identify terms from UMLS source vocabularies to enrich SNOMED CT [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>] and National Cancer Institute Thesaurus (NCIt) [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
        <p>However, this method cannot be directly applied to CHV, because it does not have hierarchical relationships (eg. parent-child relationship) that are necessary to construct topological patterns [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>]. Recently, Chandar et al [<xref ref-type="bibr" rid="ref31">31</xref>] introduced a similarity-based term recommendation method that represents n-grams extracted from the free-text eligibility criteria of clinical trials as a set of linguistic and contextual features. SNOMED CT terms are clustered with K-means clustering. The new terms are ordered by their distance to the nearest cluster centroid, representing their similarity to existing SNOMED CT terms. This method performed well on the corpus of free-text clinical study eligibility criteria, because they are mostly short and partial sentences written by health professionals with fruitful medical terms and little noise. It has yet to be tested on free-form consumer text that typically contains lengthy sentences and lay terms.</p>
        <p>Most previous studies concerning CHV development concentrated on the identification of new terms used by consumers [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. To the best of our knowledge, no prior studies have conducted in-depth assessment of the coverage and popularity of medical concepts in user-generated documents on social media. In this respect, there is a need to understand consumers’ use of terms in existing controlled vocabularies, and to perceive if there is the potential to improve CHV by incorporating health-related concepts used by consumers that are covered by professional vocabularies. In this study, therefore, we performed such an analysis in order to assess consumers’ use of medical concepts on social media postings pertaining to health concerns and to evaluate how many popular consumer terms have been included in the existing source vocabularies of the UMLS [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
        <p>In this study, we focus on diabetes, which is recognized as one of the most important public health problems with escalating health concerns by the World Health Organization (WHO) [<xref ref-type="bibr" rid="ref33">33</xref>]. Diabetes caused 1.5 million deaths in 2012 alone. It is known to cause disability and an array of serious health issues such as hypertension, nephropathy, and stroke [<xref ref-type="bibr" rid="ref34">34</xref>]. Global diabetes cases skyrocketed from 108 million in 1980 to 422 million in 2014. The number of diabetes onset will likely reach 700 million by 2025 [<xref ref-type="bibr" rid="ref35">35</xref>]. Diabetes and its complications not only impair population health but also impose substantial economic burdens on patients, their family, and the society [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
        <p>In this study, we collected diabetes-related consumer-generated blog postings from Tumblr and diabetes-related questions and answers from Yahoo! Answers. We carried out text mining to identify UMLS concepts from our datasets. Thus, we formulated the 2 research questions (RQs): (1) To what degree do the concepts from UMLS source vocabularies cover the concepts used by consumers describing their diabetes-related concerns on health postings of social media, especially blogs and social Q&#38;A? Which concepts do or do not overlap? (2) To what degree are the UMLS semantic types applicable to analyzing the concepts used by consumers when describing their diabetes-related concerns in social media, especially blogs and social Q&#38;A? Which semantic types are observed?</p>
        <p>In the first research question, we evaluated the coverage of all of the 178 English source vocabularies of the UMLS in our 2 datasets from Tumblr and Yahoo! Answers. In the second research question, we analyzed the semantic types of the UMLS concepts identified in our datasets.</p>
        <p>The current study mainly investigated the overlap between consumer concepts from social media and professional concepts in the UMLS. Indeed, consumers often proactively seek and share online health information on social media [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. Their use of professional terms could be sophisticated covering both laypersons’ expressions and medical terminologies. In fact, not only consumers but also health care professionals have actively participated in creating health postings in social media [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Their use of terms in social media, however, is likely to be more consumer/patient-centric for health education and promotion to the public. The comparative analysis of the concept coverage between consumers and professional vocabularies in social media may be helpful in understanding the scale of the phenomenon. The comparison will also help yield insights into the nature of the vocabulary gap, which will contribute to the consistent development of the CHV. The current study, in particular, could shed light on how much social media users use existing terms in UMLS source vocabularies on the web. At the same time, findings from the current study could inform the feasibility of leveraging existing UMLS source vocabularies to enrich the CHV.</p>
      </sec>
      <sec>
        <title>The Unified Medical Language System</title>
        <p>The UMLS, maintained by the NLM of the National Institutes of Health, is the largest biomedical terminological system. Its 2-level structure consists of Metathesaurus and Semantic Network. The UMLS Metathesaurus is “a large, multi-purpose, and multi-lingual thesaurus that contains millions of biomedical and health related concepts, their synonymous names, and their relationships” [<xref ref-type="bibr" rid="ref40">40</xref>]. The UMLS Metathesaurus integrates more than 9.1 million terms from over 170 English source vocabularies into 3.1 million medical concepts (2015AA version). Besides English, the UMLS also contains source vocabularies in 20 other languages. The UMLS has integrated most of the well-designed and well-maintained medical terminologies such as SNOMED CT, the International Classification of Diseases 9<sup>th</sup> Revision, Clinical Modification (ICD-9-CM), NCIt, and RxNORM. SNOMED CT is the most comprehensive and precise clinical terminology in the world with over 310,000 active concepts [<xref ref-type="bibr" rid="ref41">41</xref>]. ICD-9-CM is used primarily to encode the diagnoses and procedures for billing purposes [<xref ref-type="bibr" rid="ref42">42</xref>]. RxNORM, on the other hand, normalizes names of all clinical drugs available on the US market and their links to many of the drug vocabularies commonly used in pharmacy management [<xref ref-type="bibr" rid="ref43">43</xref>]. Most significantly, the terms with the same meaning are mapped to the same concept in the UMLS. Due to its native term mapping, the UMLS is a valuable resource for supporting interoperability and translation in biomedicine [<xref ref-type="bibr" rid="ref32">32</xref>]. The NLM releases a new version of the UMLS twice a year.</p>
        <p>The UMLS semantic types represent “a set of broad subject categories that provide a consistent categorization of all concepts represented in the UMLS Metathesaurus” [<xref ref-type="bibr" rid="ref44">44</xref>]. Each concept in the UMLS is assigned 1 or more semantic types. In the 2015AA version of the UMLS, there are a total of 127 semantic types, describing concepts at the levels of entity and event. Entities include physical objects such as organism, anatomical structure, and substances. Events describe activities, phenomenon, and processes. For example, the semantic type “Disease or Syndrome” categorizes a set of concepts in the UMLS that indicate “a condition which alters or interferes with a normal process, state, or activity of an organism.”</p>
      </sec>
      <sec>
        <title>Consumer Health Vocabularies and Their Use in Consumer-Oriented Health Applications</title>
        <p>OAC CHV has been used in various health-related applications to improve patients’ access to health information. Zeng et al developed a translator specifically to convert texts in electronic health records to consumer-friendly text in patient health records by replacing UMLS terms to their corresponding OAC CHV terms [<xref ref-type="bibr" rid="ref45">45</xref>]. Many UMLS concepts have one to one match with OAC CHV concepts. All the OAC CHV concepts have predefined consumer-friendly display names. Besides OAC CHV, other proprietary consumer health vocabularies have been developed. For example, Apelon has developed a CHV and has mapped their CHV terms to corresponding clinical concepts in SNOMED RT (an earlier version of SNOMED CT, developed by College of American Pathologists), ICD-9-CM, and Physician’s Current Procedural Terminology (CPT) administrative codes. The CHV of Apelon has been used in various applications, such as consumer health data entry, patient results reporting clinical note translation, and Web-based information retrieval [<xref ref-type="bibr" rid="ref46">46</xref>]. Mayo Clinic also developed their own consumer health vocabulary, which has a rich content of disease concepts as well as genetic and non-genetic risk factors to diseases [<xref ref-type="bibr" rid="ref8">8</xref>]. In this paper, we used OAC CHV because it is the only publicly available consumer health vocabulary that we have access to (through the UMLS).</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Collection</title>
        <p>2 types of social media were analyzed in the current study, namely blogs and social Q&#38;A, as they allow consumers to generate and freely exchange health information in text format. Health-related blogs are one of the most popular social media venues for health information distribution. Bloggers typically describe their personal experiences with diseases along with their encounters with health care professionals [<xref ref-type="bibr" rid="ref47">47</xref>]. Health care professionals also create blogs for sharing their medical knowledge and information with patients [<xref ref-type="bibr" rid="ref48">48</xref>]. Blogs have also been widely used for health promotion and education as a collaborative tool for both consumers and health care professionals [<xref ref-type="bibr" rid="ref49">49</xref>-<xref ref-type="bibr" rid="ref51">51</xref>]. On the other hand, social Q&#38;A is an online community-based Q&#38;A service where people gain knowledge through raising questions and receiving answers from others who willingly share their knowledge, experiences, and opinions regarding a wide range of topics including health. Social Q&#38;A is considered to be a knowledge-shaping sphere for laypeople [<xref ref-type="bibr" rid="ref52">52</xref>]. Consumers are motivated to use social Q&#38;A because their searches on web search engines with short queries that are not fully expressive often fail in retrieving useful information for their specific problems, while social Q&#38;A allows them to ask questions in natural language and in full sentences [<xref ref-type="bibr" rid="ref11">11</xref>]. For data collection, we used 2 datasets: (1) Tumblr, a popular blogging service; and (2) Yahoo! Answers, a social Q&#38;A service in North America.</p>
        <p>Tumblr and Yahoo! Answers were chosen for the current study due to their popularity and the convenience of using their Application Program Interfaces (APIs), which allowed us to collect data automatically from these sites. Also, both Tumblr and Yahoo! Answers do not limit the number of words in postings. As such, their users can elaborate their health concerns and information on postings with sufficient details, thereby providing us ample opportunities to extract and analyze relevant concepts from the postings.</p>
        <p>Tumblr is one of the fastest-growing blog sites with nearly twenty-fold increase in the number of blogs from October 2012 to October 2015 [<xref ref-type="bibr" rid="ref53">53</xref>]. It launched relatively late in the market compared to other sites such as WordPress and Blogger, but is recognized as one of the best blog sites due to its ease of setup, stylish interface design, and micro-blogging support [<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref55">55</xref>]. It has over 227 million blogs and 37 million unique visitors as of February 2016 [<xref ref-type="bibr" rid="ref53">53</xref>]. From Tumblr, we collected a total of 3711 English text blogs with a tag related to “diabetes” (eg, “diabetes,” “diabetes mellitus,” and “Type 2 diabetes”) posted between February and October 2015.</p>
        <p>Yahoo! Answers is one of the most popular social Q&#38;A sites with approximately 5.6 million visitors per month as of February 2016 [<xref ref-type="bibr" rid="ref56">56</xref>]. From Yahoo! Answers, we garnered a total of 58,422 questions and associated answers between 2009 and 2014 in the diabetes category of Yahoo! Answers. During data analysis, we carried out text mining with questions and answers (specifically, best answers) separately, because the information in questions and answers could be different. Questions could capture health concerns and associated problems, while answers could mainly discuss information resources intended to solve the problems. It is important to note that 1 question may have more than one answer. In this study, we limited answers to the one selected as the best answer by the questioner. The data collected from Yahoo! Answers were separated into questions and answers in the subsequent analyses.</p>
      </sec>
      <sec>
        <title>Units of Analysis</title>
        <p>Once we collected text data from Tumblr and Yahoo! Answers, we mined the text data for “concepts,” a unit of understanding which represents a fundamental component of terminology [<xref ref-type="bibr" rid="ref57">57</xref>] or unit of meaning in an ontology [<xref ref-type="bibr" rid="ref31">31</xref>]. Concepts are different from “terms” in that a term refers to an entity or “physical object” written or spoken in text to represent a concept or thought [<xref ref-type="bibr" rid="ref58">58</xref>]. In the UMLS, a term is described as a “word or collection of words comprising an expression,” which indicates a class of all lexical variants (eg, “eye,” “Eye,” “eyes”) [<xref ref-type="bibr" rid="ref59">59</xref>]. The UMLS assigned each term an atom unique identifier (AUI) and grouped the terms with the same meaning into a concept with a concept unique identifier (CUI). We also analyzed the semantic types of the extracted concepts in order to understand the broad semantic categories of the terms that are frequently used by consumers.</p>
      </sec>
      <sec>
        <title>Textual Data Processing</title>
        <p>We used a widely adopted biomedical text processing framework Apache cTAKES™ [<xref ref-type="bibr" rid="ref60">60</xref>] and its extension YTEX [<xref ref-type="bibr" rid="ref61">61</xref>] to identify UMLS terms in our datasets. Apache cTAKES is designed as a natural language processing (NLP) system for extraction of information from the free-text data available in electronic medical records (EMRs). It provides an agile and flexible platform based on the Unstructured Information Management Architecture (UIMA) and a rich NLP library. YTEX, a module of cTAKES, provides Word Sense Disambiguation (WSD), data mining and feature engineering functionalities. We mainly used the WSD function of YTEX to recognize the most possible UMLS concept when a term in the free text can be matched to multiple ambiguous concepts. We used the 3.2.2 release of cTAKES and YTEX with the default workflow configuration named “Aggregate Plaintext UMLS Processor.”</p>
        <p><xref ref-type="fig" rid="figure1">Figure 1</xref> illustrates our overall analysis process. First, each document is a blog posting from Tumblr, a question or an answer from Yahoo! Answers. Each blog posting may consist of 1 or more sentences. Then, cTAKES detected and split each document into individual sentences using the sentence detector of OpenNLP [<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref63">63</xref>], with the default configuration for English text. For each sentence, cTAKES performed tokenization with the default tokenizer of the OpenNLP, lexical variant generation using the lexical tool provided by the United States National Library of Medicine with the default configuration. Then, cTAKES performed Part-Of-Speech (POS) tagging using the POS tagger in OpenNLP with the information entropy-based model for English to generate the candidate terms for further processing. Then, YTEX matched the candidate terms with all the possible UMLS terms, which were preloaded from the MRCONSO table of the UMLS 2015AA release. We then stored the matching results to a MySQL database. For each candidate term, there may be 0, 1, or more matching UMLS terms with different semantics. To identify terms with reasonable semantics, we used YTEX to perform word sense disambiguation (WSD), in which the intrinsic information content (IC) measure is used as the semantic similarity metric with a window of 50 words as the context for WSD. The intrinsic information content is a measure of concept specificity computed from the structure of the taxonomy in a biomedical terminology and does not rely on the term frequency in the corpus. The details of the intrinsic IC measure can be found in Garla et al [<xref ref-type="bibr" rid="ref64">64</xref>]. Finally, all the UMLS terms in each record were extracted with a UMLS CUI.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Conceptual framework of the study. Dots refer to concepts extracted from the dataset and gray dots refer to concepts mapped to the concepts in one of the UMLS source vocabularies.</p>
          </caption>
          <graphic xlink:href="medinform_v4i4e41_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Concept Coverage Analysis</title>
        <p>We first analyzed the basic characteristics of the overall concept coverage across our datasets collected from Tumblr and Yahoo! Answers: (1) blog postings from Tumblr, (2) questions in Yahoo! Answers, and (3) answers in Yahoo! Answers. We then analyzed the coverage of each source vocabulary in the UMLS across the datasets. SNOMED CT and CHV are the 2 vocabularies with the highest concept coverage in our datasets. Thus, we conducted a concept coverage analysis of SNOMED CT and CHV based on our datasets. We also analyzed the semantic types of the concepts identified from our datasets.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Aggregate Characteristics of the Datasets</title>
        <p>We identified 2415 UMLS concepts from blog postings, 6452 UMLS concepts from questions, and 10,378 UMLS concepts from answers. <xref ref-type="table" rid="table1">Table 1</xref> shows the total number of documents and sentences in our datasets (ie, blog postings, questions, answers). These numbers were compared to the “# with UMLS concepts,” the unique number of documents and sentences containing the identified UMLS concepts. Note that we can only extract concepts that are presented in UMLS. Thus, the total number of concepts in our datasets (which can include concepts that are not in UMLS) is not provided in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <p>There was a noticeable variation across the datasets. Over 80% of the documents from questions and answers contained 1 or more UMLS concepts whereas less than half of the documents from blogs did. Over half of the sentences from questions and answers contained at least 1 UMLS concept, while only 27% of those from blog posts contained at least 1 UMLS concept.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Basic characteristics of UMLS concept coverage in our datasets.</p>
          </caption>
          <table width="623" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="60"/>
            <col width="60"/>
            <col width="100"/>
            <col width="60"/>
            <col width="100"/>
            <col width="60"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td><break/></td>
                <td colspan="2">Tumblr</td>
                <td colspan="4">Yahoo! Answers</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td colspan="2">Blog postings</td>
                <td colspan="2">Questions</td>
                <td colspan="2">Answers</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Total #</td>
                <td># with UMLS concepts</td>
                <td>Total #</td>
                <td># with UMLS concepts</td>
                <td>Total #</td>
                <td># with UMLS concepts</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Document</td>
                <td>3711</td>
                <td>1388 (37.4%)</td>
                <td>58,422</td>
                <td>51,850 (88.8%)</td>
                <td>58,422</td>
                <td>51,550 (88.2%)</td>
              </tr>
              <tr valign="top">
                <td>Sentence</td>
                <td>47,413</td>
                <td>12,802 (27.0%)</td>
                <td>249,013</td>
                <td>142,802 (57.3%)</td>
                <td>348,793</td>
                <td>216,736 (62.1%)</td>
              </tr>
              <tr valign="top">
                <td>Concepts</td>
                <td>–</td>
                <td>2415</td>
                <td>–</td>
                <td>6452</td>
                <td>–</td>
                <td>10,378</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Coverage by the UMLS Source Vocabularies</title>
        <p>The concepts in the blogs were covered by 56 UMLS source vocabularies, while those in questions and answers were covered by 58 source vocabularies. <xref ref-type="table" rid="table2">Table 2</xref> illustrates the top 20 most covered UMLS source vocabularies (The full names and the version information of the source vocabularies can be found in the <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref> Table-A1). SNOMED CT was dominant across all our datasets, ranging from 84.9% to 95.9%. It was followed by CHV (between 73.5% and 80.0%) and MTH (between 55.7% and 73.5%). Other source vocabularies within the top 10 for all of our datasets are: NCIt, Medical Subject Headings (MeSH), Computer Retrieval of Information on Scientific Projects Thesaurus (CSP), Library of Congress Subject Headings Northwestern University subset (LCH NW), Logical Observation Identifier Names and Codes (LOINC), and National Drug File – Reference Terminology (NDFRT), although the ranking order varies slightly across different datasets. <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref> Table-A2 provides example concepts in the top 3 most covered source vocabularies.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Top 20 mostly covered UMLS source vocabularies.</p>
          </caption>
          <table width="660" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="37"/>
            <col width="86"/>
            <col width="56"/>
            <col width="33"/>
            <col width="20"/>
            <col width="56"/>
            <col width="10"/>
            <col width="36"/>
            <col width="33"/>
            <col width="24"/>
            <col width="52"/>
            <col width="56"/>
            <col width="31"/>
            <thead>
              <tr valign="top">
                <td><break/></td>
                <td colspan="3">Tumblr</td>

                <td colspan="9">Yahoo! Answers</td>
              </tr>
              <tr valign="top">
                <td rowspan="2">Rank</td>
                <td colspan="3">Blogs (n=2415)</td>

                <td colspan="5">Questions (n=6452)</td>

                <td colspan="4">Answers (n=10,378)</td>
              </tr>
              <tr valign="top">
                <td>Source vocabulary</td>
                <td># of concepts</td>
                <td>%</td>
                <td colspan="2">Source vocabulary</td>
                <td colspan="2"># of concepts</td>
                <td>%</td>
                <td colspan="2">Source vocabulary</td>
                <td># of concepts</td>
                <td>%</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>SNOMED CT</td>
                <td>2315</td>
                <td>95.9</td>
                <td colspan="2">SNOMED CT</td>
                <td colspan="2">5476</td>
                <td>84.9</td>
                <td colspan="2">SNOMED CT</td>
                <td>9032</td>
                <td>87.0</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>CHV</td>
                <td>1931</td>
                <td>80.0</td>
                <td colspan="2">CHV</td>
                <td colspan="2">4928</td>
                <td>76.4</td>
                <td colspan="2">CHV</td>
                <td>7625</td>
                <td>73.5</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>MTH</td>
                <td>1774</td>
                <td>73.5</td>
                <td colspan="2">MTH</td>
                <td colspan="2">3899</td>
                <td>60.4</td>
                <td colspan="2">MTH</td>
                <td>5780</td>
                <td>55.7</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>NCIt</td>
                <td>1156</td>
                <td>47.9</td>
                <td colspan="2">MeSH</td>
                <td colspan="2">2957</td>
                <td>45.8</td>
                <td colspan="2">MeSH</td>
                <td>4796</td>
                <td>46.2</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>MeSH</td>
                <td>1130</td>
                <td>46.8</td>
                <td colspan="2">NCIt</td>
                <td colspan="2">2917</td>
                <td>45.2</td>
                <td colspan="2">NCIt</td>
                <td>4485</td>
                <td>43.2</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>CSP</td>
                <td>812</td>
                <td>33.6</td>
                <td colspan="2">CSP</td>
                <td colspan="2">1840</td>
                <td>28.5</td>
                <td colspan="2">NDFRT</td>
                <td>2999</td>
                <td>28.9</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>AOD</td>
                <td>775</td>
                <td>32.1</td>
                <td colspan="2">NDFRT</td>
                <td colspan="2">1775</td>
                <td>27.5</td>
                <td colspan="2">CSP</td>
                <td>2839</td>
                <td>27.4</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>LCH_NW</td>
                <td>771</td>
                <td>31.9</td>
                <td colspan="2">LCH_NW</td>
                <td colspan="2">1627</td>
                <td>25.2</td>
                <td colspan="2">LCH_NW</td>
                <td>2436</td>
                <td>23.5</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>LOINC</td>
                <td>697</td>
                <td>28.9</td>
                <td colspan="2">AOD</td>
                <td colspan="2">1585</td>
                <td>24.6</td>
                <td colspan="2">AOD</td>
                <td>2335</td>
                <td>22.5</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>NDFRT</td>
                <td>659</td>
                <td>27.3</td>
                <td colspan="2">LOINC</td>
                <td colspan="2">1510</td>
                <td>23.4</td>
                <td colspan="2">RXNORM</td>
                <td>2099</td>
                <td>20.2</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>LCH</td>
                <td>587</td>
                <td>24.3</td>
                <td colspan="2">RXNORM</td>
                <td colspan="2">1421</td>
                <td>22.0</td>
                <td colspan="2">LOINC</td>
                <td>2081</td>
                <td>20.1</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>NCI_NCI-GLOSS</td>
                <td>475</td>
                <td>19.7</td>
                <td colspan="2">LCH</td>
                <td colspan="2">1187</td>
                <td>18.4</td>
                <td colspan="2">LCH</td>
                <td>1730</td>
                <td>16.7</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>MEDLINEPLUS</td>
                <td>402</td>
                <td>16.6</td>
                <td colspan="2">NCI_NCI-GLOSS</td>
                <td colspan="2">952</td>
                <td>14.8</td>
                <td colspan="2">NCI_FDA</td>
                <td>1387</td>
                <td>13.4</td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>CST</td>
                <td>365</td>
                <td>15.1</td>
                <td colspan="2">NCI_FDA</td>
                <td colspan="2">868</td>
                <td>13.5</td>
                <td colspan="2">DXP</td>
                <td>1322</td>
                <td>12.7</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>COSTAR</td>
                <td>362</td>
                <td>15.0</td>
                <td colspan="2">COSTAR</td>
                <td colspan="2">835</td>
                <td>12.9</td>
                <td colspan="2">NCI_NCI-GLOSS</td>
                <td>1321</td>
                <td>12.7</td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>NCI_FDA</td>
                <td>345</td>
                <td>14.3</td>
                <td colspan="2">DXP</td>
                <td colspan="2">830</td>
                <td>12.9</td>
                <td colspan="2">COSTAR</td>
                <td>1257</td>
                <td>12.1</td>
              </tr>
              <tr valign="top">
                <td>17</td>
                <td>OMIM</td>
                <td>342</td>
                <td>14.2</td>
                <td colspan="2">CST</td>
                <td colspan="2">794</td>
                <td>12.3</td>
                <td colspan="2">OMIM</td>
                <td>1234</td>
                <td>11.9</td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>RXNORM</td>
                <td>338</td>
                <td>14.0</td>
                <td colspan="2">OMIM</td>
                <td colspan="2">790</td>
                <td>12.2</td>
                <td colspan="2">CST</td>
                <td>1206</td>
                <td>11.6</td>
              </tr>
              <tr valign="top">
                <td>19</td>
                <td>DXP</td>
                <td>326</td>
                <td>13.5</td>
                <td colspan="2">MEDLINEPLUS</td>
                <td colspan="2">721</td>
                <td>11.2</td>
                <td colspan="2">VANDF</td>
                <td>1117</td>
                <td>10.8</td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>ICD9CM</td>
                <td>241</td>
                <td>10.0</td>
                <td colspan="2">VANDF</td>
                <td colspan="2">644</td>
                <td>10.0</td>
                <td colspan="2">MTHSPL</td>
                <td>1033</td>
                <td>10.0</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Top 10 frequently observed concepts covered by both SNOMED CT and CHV.</p>
          </caption>
          <table width="643" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="32"/>
            <col width="134"/>
            <col width="38"/>
            <col width="140"/>
            <col width="44"/>
            <col width="140"/>
            <col width="43"/>
            <thead>
              <tr valign="top">
                <td rowspan="3">Rank</td>
                <td colspan="2">Tumblr</td>
                <td colspan="4">Yahoo! Answers</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td><break/></td>
                <td colspan="2">Questions</td>
                <td colspan="2">Answers</td>
              </tr>
              <tr valign="top">
                <td>Concept</td>
                <td>Freq.</td>
                <td>Concept</td>
                <td>Freq.</td>
                <td>Concept</td>
                <td>Freq.</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Blood (C0005767)</td>
                <td>816</td>
                <td>Blood (C0005767)</td>
                <td>30,654</td>
                <td>Blood (C0005767)</td>
                <td>54,689</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Pain (C0030193)</td>
                <td>798</td>
                <td>Sugars (C0242209)</td>
                <td>29,593</td>
                <td>Sugars (C0242209)</td>
                <td>49,207</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Insulin (C0021641)</td>
                <td>744</td>
                <td>Insulin (C0021641)</td>
                <td>10,816</td>
                <td>Insulin (C0021641)</td>
                <td>27,887</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Pharmaceutical preparations (C0013227)</td>
                <td>719</td>
                <td>Glucose (C0017725)</td>
                <td>7394</td>
                <td>Glucose (C0017725)</td>
                <td>26,420</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Sugars (C0242209)</td>
                <td>699</td>
                <td>Problem (C0033213)</td>
                <td>5111</td>
                <td>Pharmaceutical preparations (C0013227)</td>
                <td>11,571</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Disease (C0012634)</td>
                <td>617</td>
                <td>Water (C0043047)</td>
                <td>4781</td>
                <td>Diseases (C0012634)</td>
                <td>9733</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>Problem (C0033213)</td>
                <td>568</td>
                <td>Pharmaceutical preparations (C0013227)</td>
                <td>4456</td>
                <td>Carbohydrates (C0007004)</td>
                <td>9517</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>Diabetes mellitus (C0011849)</td>
                <td>501</td>
                <td>Hematologic tests (C0018941)</td>
                <td>3784</td>
                <td>Problem (C0033213)</td>
                <td>9248</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>Teeth structure (C0040426)</td>
                <td>424</td>
                <td>Pain (C0030193)</td>
                <td>3625</td>
                <td>Water (C0043047)</td>
                <td>5994</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>Operative surgery procedures (C0543467)</td>
                <td>375</td>
                <td>Urine (C0042036)</td>
                <td>2550</td>
                <td>Fasting (C0015663)</td>
                <td>5848</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Top 10 frequently observed concepts covered by CHV but not SNOMED CT.</p>
          </caption>
          <table width="643" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="38"/>
            <col width="141"/>
            <col width="38"/>
            <col width="118"/>
            <col width="12"/>
            <col width="38"/>
            <col width="140"/>
            <col width="37"/>
            <thead>
              <tr valign="top">
                <td><break/></td>
                <td colspan="2">Tumblr</td>

                <td colspan="5">Yahoo! Answers</td>
              </tr>
              <tr valign="top">
                <td rowspan="2">Rank</td>
                <td><break/></td>
                <td><break/></td>
                <td colspan="3">Questions</td>
                <td colspan="2">Answers</td>
              </tr>
              <tr valign="top">
                <td>Concept (CUI)<sup>a</sup></td>
                <td>Freq.</td>
                <td colspan="2">Concept (CUI)</td>
                <td>Freq.</td>
                <td>Concept (CUI)</td>
                <td>Freq.</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Cider vinegar (C0937941)</td>
                <td>54</td>
                <td colspan="2">Stomach (C0038351)</td>
                <td>1050</td>
                <td>Lantus (C0876064)</td>
                <td>689</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Apple cider vinegar (C1178459)</td>
                <td>54</td>
                <td colspan="2">Lantus (C0876064)</td>
                <td>571</td>
                <td>Actos (C0875954)</td>
                <td>659</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Lantus (C0876064)</td>
                <td>15</td>
                <td colspan="2">Humalog (C0528249)</td>
                <td>260</td>
                <td>Avandia (C0875967)</td>
                <td>628</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Gentle (C0720654)</td>
                <td>11</td>
                <td colspan="2">NovoLog (C0939412)</td>
                <td>180</td>
                <td>HumaLog (C0528249)</td>
                <td>289</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Corrective (C0719519)</td>
                <td>9</td>
                <td colspan="2">Glucophage (C0591573)</td>
                <td>131</td>
                <td>NovoLog (C0939412)</td>
                <td>255</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Botox (C0700702)</td>
                <td>9</td>
                <td colspan="2">Levemir (C1314782)</td>
                <td>122</td>
                <td>Levemir (C1314782)</td>
                <td>184</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>RID (C0073361)</td>
                <td>6</td>
                <td colspan="2">Actos (C0875954)</td>
                <td>95</td>
                <td>Glucophage (C1314782)</td>
                <td>161</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>HumaLog (C0528249)</td>
                <td>5</td>
                <td colspan="2">Seroquel (C0287163)</td>
                <td>78</td>
                <td>Novolin (C0028467)</td>
                <td>112</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>Bead Dosage Form (C0991566)</td>
                <td>3</td>
                <td colspan="2">Synthroid (C0728762)</td>
                <td>62</td>
                <td>Viagra (C0663448)</td>
                <td>105</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>Actos (C0875954)</td>
                <td>3</td>
                <td colspan="2">Coumadin (C0699129)</td>
                <td>54</td>
                <td>Triphosphat (C0146894)</td>
                <td>77</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>CUI: concept unique identifier</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>There was significant overlap between the concepts from the top 2 source vocabularies, SNOMED CT and CHV⎯ 78.2% (1889/2415) concepts from blog postings, 70.0% (4518/6452) concepts in questions, and 68.4% (7095/10,378) concepts in answers. <xref ref-type="table" rid="table3">Table 3</xref> shows the top 10 concepts. Note that we only show the preferred term of the concept in the UMLS throughout the paper. Diabetes-related concepts such as <italic>Blood</italic>, <italic>Sugars</italic>, <italic>Insulin</italic>, <italic>Glucose</italic>, and <italic>Diabetes mellitus</italic> were frequently mentioned (preferred names of a UMLS concept are denoted in italics). At the same time, it includes some general medical concepts such as <italic>disease</italic>, <italic>pharmaceutical preparations</italic>, and <italic>problem.</italic> Concepts related to glucose level in blood such as <italic>blood, sugars, glucose,</italic> and <italic>carbohydrates</italic> also appeared with high frequency.</p>
        <p>A few concepts were only covered by CHV: 1.7% (40/2415) concepts in blog postings, 6.3% (409/6452) concepts in questions, and 5.1% (529/10,378) concepts in answers. <xref ref-type="table" rid="table4">Table 4</xref> shows the top 10 most frequently observed UMLS concepts covered by CHV but not SNOMED CT in our datasets.</p>
        <p>All the concepts in <xref ref-type="table" rid="table4">Table 4</xref> are about pharmacological substances or organic chemicals, except <italic>stomach</italic> found within questions. Three concepts regarding insulin therapy for diabetes, such as <italic>Lantus</italic> (ie, insulin glargine injection), <italic>Humalog</italic> (ie, insulin lispro injection), and <italic>Actos</italic> (ie, pioglitazone hydrochloride) in blog postings and questions/answers appeared with high frequency. Diabetes-treatment-related concepts, such as <italic>NovoLog</italic> and <italic>Glucophage</italic>, are more frequently observed in questions and answers than blog postings. In total, 9 out of the top 10 concepts in questions and answers were diabetes medications. Only 2 concepts, namely <italic>stomach</italic> in questions and <italic>Viagra</italic> in answers, are not directly related to diabetes treatment. On the other hand, some concepts in blogs were indirectly related to diabetes. For example, <italic>cider vinegar</italic>, <italic>apple cider vinegar</italic>, and <italic>Botox</italic> also frequently appeared.</p>
        <p>There were also the concepts covered by SNOMED CT but not CHV: 17.6% (424/2415) concepts from blog postings, 957/6452 (14.8%) concepts in questions and 18.7% (1936/10,378) concepts in answers (See <xref ref-type="table" rid="table5">Table 5</xref>). Human body related concepts, such as <italic>back structure excluding neck, entire heart</italic>, <italic>entire pancreas</italic>, <italic>entire kidney</italic>, entire <italic>skin,</italic> and <italic>entire eye</italic>, were frequently used to describe their diabetes issues in blog postings or questions/answers. Three concepts, <italic>entire skin</italic>, <italic>symptoms</italic> and <italic>fatty acid glycerol esters</italic> were observed from all our datasets. <italic>Massage</italic> and <italic>training</italic> were frequently mentioned in blog postings, while <italic>injection procedure</italic> and <italic>protective cup</italic> were frequently mentioned in questions and answers but were not mentioned as frequently as in blog postings. As these concepts were frequently observed in social media, CHV should consider importing them to enrich its conceptual content.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Top 10 frequently observed concepts covered by SNOMED CT but not CHV.</p>
          </caption>
          <table width="643" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="38"/>
            <col width="146"/>
            <col width="38"/>
            <col width="132"/>
            <col width="38"/>
            <col width="117"/>
            <col width="49"/>
            <thead>
              <tr valign="top">
                <td><break/></td>
                <td colspan="2">Tumblr</td>

                <td colspan="3">Yahoo! Answers</td>
              </tr>
              <tr valign="top">
                <td rowspan="2">Rank</td>
                <td><break/></td>
                <td><break/></td>
                <td colspan="2">Questions</td>
                <td colspan="2">Answers</td>
              </tr>
              <tr valign="top">
                <td>Concept (CUI)<sup>a</sup></td>
                <td>Freq.</td>
                <td>Concept (CUI)</td>
                <td>Freq.</td>
                <td>Concept (CUI)</td>
                <td>Freq.</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Entire skin (C1278993)</td>
                <td>524</td>
                <td>Symptoms (C1457887)</td>
                <td>7690</td>
                <td>Symptoms (C1457887)</td>
                <td>12,727</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Symptoms (C1457887)</td>
                <td>393</td>
                <td>Fatty acid glycerol esters (C0015677)</td>
                <td>1789</td>
                <td>Fatty acid glycerol esters (C0015677)</td>
                <td>8727</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Back structure, excluding neck (C1995000)</td>
                <td>236</td>
                <td>Entire foot (C1281587)</td>
                <td>1647</td>
                <td>Entire cells (C1269647)</td>
                <td>6435</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Massage (C0024875)</td>
                <td>217</td>
                <td>Back structure, excluding neck (C1995000)</td>
                <td>1589</td>
                <td>Entire heart (C1281570)</td>
                <td>3204</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Fatty acid glycerol esters (C0015677)</td>
                <td>210</td>
                <td>Entire kidney (C1278978)</td>
                <td>1368</td>
                <td>Entire pancreas (C1278931)</td>
                <td>3003</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Training (C0220931)</td>
                <td>163</td>
                <td>Entire eye (C1280202)</td>
                <td>1210</td>
                <td>Entire skin (C1278993)</td>
                <td>2614</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>Entire pancreas (C1278931)</td>
                <td>157</td>
                <td>Protective cup (C1533124)</td>
                <td>1159</td>
                <td>Protective cup (C1533124)</td>
                <td>2178</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>Entire heart (C1281570)</td>
                <td>156</td>
                <td>Entire lower limb (C1269079)</td>
                <td>985</td>
                <td>Entire stomach (C1278920)</td>
                <td>1876</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>Entire oral cavity (C1278910)</td>
                <td>138</td>
                <td>Entire hands (C1281583)</td>
                <td>969</td>
                <td>Injection procedure (C1533685)</td>
                <td>1561</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>Entire spine (C1280065)</td>
                <td>137</td>
                <td>Entire skin (C1278993)</td>
                <td>912</td>
                <td>Entire bony skeleton (C1266909)</td>
                <td>1501</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>CUI: concept unique identifier</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
<sec>
          <title>Semantic Types of the Identified Concepts</title>

        <p>Among 127 UMLS semantic types (STY), about half of them were identified in our datasets: 52 STYs (40.9%) in the blog postings, 59 STYs (46.5%) in the questions, and 54 STYs (42.5%) in the answers. In general, there was a significant overlap of STYs across our datasets with 52 shared STYs. Seven STYs, however, were identified in the questions only, including “Functional Concept,” “Intellectual Product,” “Laboratory Procedure,” “Organ or Tissue Function,” “Organism Attribute,” “Social Behavior,” and “Substance.” Two STYs, “Fully Formed Anatomical Structure” and “Cell or Molecular Dysfunction,” were not found in questions, but in both the answer dataset and the blog dataset. <xref ref-type="table" rid="table6">Table 6</xref> shows the top 20 frequent semantic types of the identified UMLS concepts in the different datasets respectively.</p>
        <p>When comparing the top 10 frequently observed STYs across the datasets, 9 out of 10 STYs (ie, “Finding,” “Pharmacologic Substance,” “Therapeutic or Preventive Procedure,” “Disease or Syndrome,” “Organic Chemical,” “Body Part, Organ, or Organ Component,” “Sign or Symptom,” “Medical Device,” and “Amino Acid, Peptide, or Protein”) commonly appeared across the datasets with minor differences in terms of frequency. “Laboratory Procedure” appeared frequently in questions but not in blogs and answers. “Pathologic Function” appeared frequently in answers but not in blogs and questions. Example concepts of the frequently observed STYs showed that laypeople tend to frequently use common concepts to describe their diabetes-related issues in social media. To illustrate, <italic>Sugars</italic>, <italic>Insulin</italic>, <italic>Glucose</italic> ranked in top 5 concepts of the STY “Pharmacologic Substance.” Similarly, the concepts such as <italic>Disease</italic> and <italic>Communicable Diseases</italic> appeared frequently among the concepts of the STY “Disease or Syndrome.” We provide the top 5 frequent concepts for the top 10 frequently observed semantic types in <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref> Table A3.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Top 20 frequently observed semantic types of the identified concepts.</p>
          </caption>
          <table width="637" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="40"/>
            <col width="80"/>
            <col width="38"/>
            <col width="39"/>
            <col width="80"/>
            <col width="28"/>
            <col width="44"/>
            <col width="80"/>
            <col width="43"/>
            <col width="49"/>
            <thead>
              <tr valign="top">
                <td rowspan="4">Rank</td>
                <td colspan="3">Tumblr</td>

                <td colspan="7">Yahoo! Answers</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Blogs</td>
                <td colspan="3">Questions</td>
                <td colspan="4">Answers</td>
              </tr>
              <tr valign="top">
                <td rowspan="2">Semantic type</td>
                <td colspan="2">Concept<sup>a</sup></td>
                <td rowspan="2">Semantic type</td>
                <td colspan="2">Concept</td>
                <td rowspan="2">Semantic type</td>
                <td colspan="2">Concept</td>
              </tr>
              <tr valign="top">
                <td>n (%)</td>
                <td>Freq.</td>
                <td>n (%)</td>
                <td>Freq.</td>
                <td>n (%)</td>
                <td>Freq.</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Finding</td>
                <td>380 <break/>(15.7)</td>
                <td>5277</td>
                <td>Pharmacologic substance</td>
                <td>1240 (19.2)</td>
                <td>53,976</td>
                <td>Pharmacologic substance</td>
                <td>1995 <break/>(19.2)</td>
                <td>185,880</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Pharmacologic substance</td>
                <td>307 (12.7)</td>
                <td>4413</td>
                <td>Organic chemical</td>
                <td>1006 <break/>(15.6)</td>
                <td>41,255</td>
                <td>Organic chemical</td>
                <td>1692 <break/>(16.3)</td>
                <td>123,509</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Therapeutic or preventive procedure</td>
                <td>241 <break/>(10.0)</td>
                <td>3184</td>
                <td>Finding</td>
                <td>895 <break/>(13.9)</td>
                <td>30,458</td>
                <td>Disease or syndrome</td>
                <td>1511 <break/>(14.6)</td>
                <td>57,379</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Disease or syndrome</td>
                <td>239 <break/>(9.9)</td>
                <td>2923</td>
                <td>Disease or syndrome</td>
                <td>743 (11.5)</td>
                <td>28,041</td>
                <td>Finding</td>
                <td>1302 <break/>(12.5)</td>
                <td>76,765</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Organic chemical</td>
                <td>225 <break/>(9.3)</td>
                <td>2737</td>
                <td>Body part, organ, or organ component</td>
                <td>484 <break/>(7.5)</td>
                <td>27,172</td>
                <td>Body part, organ, or organ component</td>
                <td>666 <break/>(6.4)</td>
                <td>48,584</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Body part, organ, or organ component</td>
                <td>208 <break/>(8.6)</td>
                <td>2566</td>
                <td>Sign or symptom</td>
                <td>338 <break/>(5.2)</td>
                <td>19,601</td>
                <td>Therapeutic or preventive procedure</td>
                <td>583 <break/>(5.6)</td>
                <td>16,555</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>Sign or symptom</td>
                <td>145 (6.0)</td>
                <td>2214</td>
                <td>Therapeutic or preventive procedure</td>
                <td>331 <break/>(5.1)</td>
                <td>16,372</td>
                <td>Amino acid, peptide, or protein</td>
                <td>495 <break/>(4.8)</td>
                <td>40,521</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>Medical device</td>
                <td>134 <break/>(5.5)</td>
                <td>1319</td>
                <td>Amino acid, peptide, or protein</td>
                <td>305 <break/>(4.7)</td>
                <td>13,178</td>
                <td>Sign or symptom</td>
                <td>436 <break/>(4.2)</td>
                <td>38,905</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>Amino acid, peptide, or protein</td>
                <td>70 <break/>(2.9)</td>
                <td>1112</td>
                <td>Medical device</td>
                <td>201 <break/>(3.1)</td>
                <td>12,862</td>
                <td>Medical device</td>
                <td>347 <break/>(3.3)</td>
                <td>20,391</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>Biologically active substance</td>
                <td>69 <break/>(2.9)</td>
                <td>1093</td>
                <td>Laboratory procedure</td>
                <td>180 <break/>(2.8)</td>
                <td>10,580</td>
                <td>Pathologic function</td>
                <td>292 <break/>(2.8)</td>
                <td>12,551</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>The percentage was calculated based on the total number of unique identified UMLS concepts: blogs in Tumblr: n=2415, questions in Yahoo! Answers: n=6452, answers in Yahoo! Answers: n=10,378</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec></sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Previous studies [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref15">15</xref>] utilized user-generated documents including social media. However, they mainly used a single test bed based on the assumption that the selected test bed would properly reflect people’s medical concepts. Our study involved different types of social media which contains texts that laypeople generated for different purposes: questions are for expressing their health information seeking needs; blogs and answers are more likely for sharing their knowledge, experiences, and opinions to others. The current study investigated the terminology coverage in consumer-generated text in social media by identifying UMLS concepts and their semantic types. Our findings demonstrated that consumers use medical concepts not only from controlled vocabularies developed for consumers (ie, CHV) but also those for health professionals (eg, SNOMED CT). Our results are in line with prior observations that consumers use both lay and professional terms [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref65">65</xref>] and demonstrated that CHV can be enriched by other source vocabularies in the UMLS.</p>
        <p>The UMLS concept usage in blogs and social Q&#38;A was different in that the UMLS concepts appeared more frequently in the postings of social Q&#38;A (almost 90% questions and answers) in comparison to blog postings (about 30%). Social Q&#38;A users mainly discuss health-related issues (in the current study, diabetes-related issues) in their postings, because their participation in question asking and answering is purpose-driven. On the other hand, blog users often elaborate nonhealth related topics in their postings, although they tagged their postings with “diabetes.”</p>
        <p>In spite of the differences of the overall UMLS concept coverage between blogs and social Q&#38;A, we found that the UMLS concepts identified in different datasets can be covered by a similar number of UMLS source vocabularies. Two UMLS source vocabularies, ie, SNOMED CT and CHV, showed the best coverage. Social media users in our datasets may have advanced medical knowledge because they often use professional terms. CHV demonstrated the second largest coverage for all the datasets despite the fact that CHV has a much smaller number of concepts and terms than SNOMED CT (1:6 ratio). CHV was developed to incorporate consumer expressions presented in consumer-generated text data. Our findings showed that different social media platforms may play a similar role as consumer-generated documents for CHV enrichment, which confirmed the literature [<xref ref-type="bibr" rid="ref66">66</xref>].</p>
        <p>A comparison of the concept coverage between SNOMED CT and CHV in our datasets led us to examine the difference between the concept usages among blog and social media users. For example, <italic>cider vinegar</italic>, <italic>apple cider vinegar</italic>, <italic>massages</italic>, and <italic>training</italic> were frequently mentioned in blog postings, while they were not frequently mentioned in questions and answers. However, concepts pertaining to insulin therapy, such as <italic>Lantus, Humalog</italic>, and <italic>Actos</italic>, are frequently used in questions/answers. Consumers often inquire about a variety of insulin therapies in social Q&#38;A, while blogs often include recipes specific to the use of <italic>vinegar</italic>, a popular ingredient in diabetes-controlling food. <italic>Botox</italic> and <italic>Viagra</italic> were often mentioned in blog postings and answers. They could be important for diabetic patients, although they may not be closely related to control diabetes. It would be interesting to further analyze the relationship of these terms to diabetes. An in-depth analysis of the identified concepts along with how they are used in the original postings could produce useful information for understanding consumers’ information needs and use.</p>
        <p>According to our analysis, the percentage of unique concepts covered by CHV but not by SNOMED CT varied from 1.7% to 6.3%. In the blog dataset, where approximately 3000 blogs were analyzed, only 40 concepts were covered by CHV exclusively. On the other hand, in Yahoo! Answers, 409 concepts (6.3%) in questions and 529 concepts (5.1%) in answers were covered by CHV but not by SNOMED CT. These results indicate that the larger datasets would yield more lay concepts. The size of dataset also appeared to affect the diversity of semantics. The same set of 9 semantic types was observed frequently in all our datasets. “Finding,” “Pharmacologic Substance,” and “Disease and Syndrome” were among the top 4 most observed semantic types.</p>
        <p>Differences were observed as well. Blogs might be better platforms for consumers to discuss organic chemical, pharmacologic substances, or therapeutic or preventive procedure for diabetes. Yet, concepts of organic chemical and pharmacologic substances were also frequently used in social Q&#38;A. In social Q&#38;A data, 7 semantic types that were not identified in blogs were observed, indicating that larger datasets may yield more diverse medical concepts.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study has a few limitations. First, the blog data in Tumblr and Yahoo! Answers data were collected in different time frames and are different in size, which might have affected the findings of this study. Smaller volumes of blog data used in this study may affect the diversity of the UMLS concepts identified. Even though blogging and question posting/answering are dynamic online activities for those living with chronic diseases, Tumblr and Yahoo! Answers may not represent all the health information users’ concept usage. The datasets could be expanded to include other types of social media such as diabetes-related discussion boards. The users of these online sources may be biased towards those with greater technological proficiency, such as those who are younger, more educated or those in a higher socioeconomic status who are more likely to seek health information on the Internet. This study may not reflect the experiences of those who are older adults, less educated or underprivileged [<xref ref-type="bibr" rid="ref67">67</xref>]. Second, even though the automated NLP techniques that were employed in the current study were cost-effective, direct interaction with ordinary health information users would allow the researchers to capture more accurate meaning of medical concepts that these individuals commonly use to describe their health issues. Moreover, a qualitative approach such as content analysis also would help to identify contextual semantics of the concepts. Third, although the WSD function of YTEX is effective in selecting the most possible UMLS concepts for a term in free text, the same term may be matched to different ambiguous UMLS concepts. This is mainly due to the fact that the UMLS may contain unmapped synonymous concepts. Ideally, manual review by domain experts could be applied to further refine the automatic mapping results.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The current study examined the potential of social media as user-generated documents in which consumers’ medical concepts can be observed and leveraged for controlled vocabulary development for ordinary health information users. We selected and tested 2 social media venues, namely blogs and social Q&#38;A. Our findings showed stronger similarities rather than differences in the controlled vocabulary usage. The size of a dataset may affect the number of concepts identified. However, the similarities in the source vocabularies, frequently used concepts, and semantic types of the concepts indicate that social media sites tend to reflect the common sense of laypeople. More importantly, we found that social media users not only employ consumer concepts in CHV but also concepts in professional vocabularies such as SNOMED CT. This indicates that CHV still has room for improvements by incorporating concepts from other UMLS source vocabularies. The focus of our study is not to identify a list of consumer medical concepts, but to test the feasibility of leveraging social media data to identify consumer concepts covered by existing UMLS source vocabularies. Ultimately, it would assist consumers’ health information searches online, narrowing the disparity between ordinary health information users and medical professionals. In future studies, we will employ automated approaches to identify and recommend new medical terms and concepts from social media to enrich CHV.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <app id="app1">
        <title>Multimedia Appendix 1</title>
        <p>Table A1. Full names of the UMLS source vocabularies in <xref ref-type="table" rid="table2">Table 2</xref>. Table A2. Top 10 frequently observed concepts in the top 3 most covered source vocabularies. Table A3. Top 5 most frequently concepts in the top 9 frequent semantic types.</p>
        <media xlink:href="medinform_v4i4e41_app1.pdf" xlink:title="PDF File (Adobe PDF File), 125KB"/>
      </app>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">APIs</term>
          <def>
            <p>Application Program Languages</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUI</term>
          <def>
            <p>atom unique identifier</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CSP</term>
          <def>
            <p>Computer Retrieval of Information on Scientific Projects Thesaurus</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CUI</term>
          <def>
            <p>concept unique identifier</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">IC</term>
          <def>
            <p>information content</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">LCH NW</term>
          <def>
            <p>Library of Congress Subject Headings, Northwestern University subset</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LOINC</term>
          <def>
            <p>Logical Observation Identifier Names and Codes</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">MeSH</term>
          <def>
            <p>Medical Subject Headings</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">NCIt</term>
          <def>
            <p>National Cancer Institute Thesaurus</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">NDFRT</term>
          <def>
            <p>National Drug File – Reference Terminology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">NER</term>
          <def>
            <p>named entity recognition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">NLM</term>
          <def>
            <p>National Library of Medicine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">OAC CHV</term>
          <def>
            <p>Open-Access Collaborative Consumer Health Vocabulary</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">POS</term>
          <def>
            <p>Part-Of-Speech</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">Q&#38;A</term>
          <def>
            <p>questions and answers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">SNOMED CT</term>
          <def>
            <p>SNOMED Clinical Terms</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">STY</term>
          <def>
            <p>semantic type</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">UIMA</term>
          <def>
            <p>Unstructured Information Management Architecture</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb21">WSD</term>
          <def>
            <p>word sense disambiguation</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We would like to thank Dr Warren Allen for providing the computing resources for this work. This work was partially supported by an Amazon Web Services Education and Research Grant Award (PI: He). The work was also partially supported by National Center for Advancing Translational Sciences under the Clinical and Translational Science Award UL1TR001427 (PI: Nelson &#38; Shenkman). The content is solely the responsibility of the authors and does not represent the official view of the National Institutes of Health.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>MP initiated the idea of this study. ZH led the conceptualization, design, and implementation of this study. MP collected and provided the blog data from Tumblr.com. SO collected and provided the social Q&#38;A data from Yahoo! Answers. ZC performed the natural language processing on the datasets and structured the results in a relational database. MP performed the data analysis and drafted the initial version; ZH, SO, BJ extensively revised the draft critically and iteratively for important intellectual content. All authors contributed to the methodology development, results interpretation, edited the paper significantly, and gave final approval for the version to be published. ZH takes primary responsibility for the research reported here.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Messai</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Simonet</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Bricon-Souf</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Mousseau</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Characterizing consumer health terminology in the breast cancer field</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2010</year>  
        <volume>160</volume>  
        <issue>Pt 2</issue>  
        <fpage>991</fpage>  
        <lpage>4</lpage>  
        <pub-id pub-id-type="medline">20841832</pub-id></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Poikonen</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Vakkari</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>Lay persons? and professionals? nutrition-related vocabularies and their matching to a general and a specific thesaurus</article-title>
        <source>Journal of Information Science</source>  
        <year>2009</year>  
        <volume>35</volume>  
        <issue>2</issue>  
        <fpage>232</fpage>  
        <lpage>43</lpage> </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>CA</given-names>
          </name>
          <name name-style="western">
            <surname>Wicks</surname>
            <given-names>PJ</given-names>
          </name>
        </person-group>
        <article-title>PatientsLikeMe: Consumer health vocabulary as a folksonomy</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2008</year>  
        <fpage>682</fpage>  
        <lpage>6</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2656083&#38;tool=pmcentrez&#38;rendertype=abstract"/>
        </comment>  
        <pub-id pub-id-type="medline">18999004</pub-id>
        <pub-id pub-id-type="pmcid">PMC2656083</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Patrick</surname>
            <given-names>TB</given-names>
          </name>
          <name name-style="western">
            <surname>Monga</surname>
            <given-names>HK</given-names>
          </name>
          <name name-style="western">
            <surname>Sievert</surname>
            <given-names>ME</given-names>
          </name>
          <name name-style="western">
            <surname>Houston</surname>
            <given-names>HJ</given-names>
          </name>
          <name name-style="western">
            <surname>Longo</surname>
            <given-names>DR</given-names>
          </name>
        </person-group>
        <article-title>Evaluation of controlled vocabulary resources for development of a consumer entry vocabulary for diabetes</article-title>
        <source>J Med Internet Res</source>  
        <year>2001</year>  
        <volume>3</volume>  
        <issue>3</issue>  
        <fpage>E24</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2001/3/e24/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.3.3.e24</pub-id>
        <pub-id pub-id-type="medline">11720966</pub-id>
        <pub-id pub-id-type="pmcid">PMC1761907</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zielstorff</surname>
            <given-names>RD</given-names>
          </name>
        </person-group>
        <article-title>Controlled vocabularies for consumer health</article-title>
        <source>J Biomed Inform</source>  
        <year>2003</year>  
        <volume>36</volume>  
        <issue>4-5</issue>  
        <fpage>326</fpage>  
        <lpage>33</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S1532046403000960"/>
        </comment>  
        <pub-id pub-id-type="medline">14643728</pub-id>
        <pub-id pub-id-type="pii">S1532046403000960</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Tse</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Soergel</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Exploring medical expressions used by consumers and the media: an emerging view of consumer health vocabularies</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2003</year>  
        <fpage>674</fpage>  
        <lpage>8</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/14728258"/>
        </comment>  
        <pub-id pub-id-type="medline">14728258</pub-id>
        <pub-id pub-id-type="pii">D030002918</pub-id>
        <pub-id pub-id-type="pmcid">PMC1479921</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Vydiswaran</surname>
            <given-names>VG</given-names>
          </name>
          <name name-style="western">
            <surname>Mei</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Hanauer</surname>
            <given-names>DA</given-names>
          </name>
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>Mining consumer health vocabulary from community-generated text</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2014</year>  
        <volume>2014</volume>  
        <fpage>1150</fpage>  
        <lpage>9</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25954426"/>
        </comment>  
        <pub-id pub-id-type="medline">25954426</pub-id>
        <pub-id pub-id-type="pmcid">PMC4419967</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Seedorff</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Peterson</surname>
            <given-names>KJ</given-names>
          </name>
          <name name-style="western">
            <surname>Nelsen</surname>
            <given-names>LA</given-names>
          </name>
          <name name-style="western">
            <surname>Cocos</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>McCormick</surname>
            <given-names>JB</given-names>
          </name>
          <name name-style="western">
            <surname>Chute</surname>
            <given-names>CG</given-names>
          </name>
          <name name-style="western">
            <surname>Pathak</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Incorporating expert terminology and disease risk factors into consumer health vocabularies</article-title>
        <source>Pac Symp Biocomput</source>  
        <year>2013</year>  
        <fpage>421</fpage>  
        <lpage>32</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://psb.stanford.edu/psb-online/proceedings/psb13/abstracts/2013_p421.html"/>
        </comment>  
        <pub-id pub-id-type="medline">23424146</pub-id>
        <pub-id pub-id-type="pii">9789814447973_0041</pub-id>
        <pub-id pub-id-type="pmcid">PMC3587774</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Gross</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Taylor</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>What have we got to lose? The effect of controlled vocabulary on keyword searching results</article-title>
        <source>College &#38; Research Libraries</source>  
        <year>2005</year>  
        <volume>66</volume>  
        <issue>3</issue>  
        <fpage>212</fpage>  
        <lpage>30</lpage> </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lewis</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Eysenbach</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Jimison</surname>
            <given-names>Hb</given-names>
          </name>
          <name name-style="western">
            <surname>Kukafka</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Stavri</surname>
            <given-names>Pz</given-names>
          </name>
        </person-group>
        <article-title>Consumer health informatics</article-title>
        <source>Consumer Health Informatics: Informing Consumers and Improving Health Care</source>  
        <year>2005</year>  
        <publisher-loc>New York, NY</publisher-loc>
        <publisher-name>Springer</publisher-name>
        <fpage>1</fpage>  
        <lpage>7</lpage> </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Roberts</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Demner-Fushman</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Interactive use of online health resources: a comparison of consumer and professional questions</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2016</year>  
        <month>07</month>  
        <volume>23</volume>  
        <issue>4</issue>  
        <fpage>802</fpage>  
        <lpage>11</lpage>  
        <pub-id pub-id-type="doi">10.1093/jamia/ocw024</pub-id>
        <pub-id pub-id-type="medline">27147494</pub-id>
        <pub-id pub-id-type="pii">ocw024</pub-id>
        <pub-id pub-id-type="pmcid">PMC4926747</pub-id></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Kogan</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Ash</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Greenes</surname>
            <given-names>RA</given-names>
          </name>
        </person-group>
        <article-title>Patient and clinician vocabulary: how different are they?</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2001</year>  
        <volume>84</volume>  
        <issue>Pt 1</issue>  
        <fpage>399</fpage>  
        <lpage>403</lpage>  
        <pub-id pub-id-type="medline">11604772</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Plovnick</surname>
            <given-names>RM</given-names>
          </name>
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>QT</given-names>
          </name>
        </person-group>
        <article-title>Reformulation of consumer health queries with professional terminology: a pilot study</article-title>
        <source>J Med Internet Res</source>  
        <year>2004</year>  
        <month>09</month>  
        <day>03</day>  
        <volume>6</volume>  
        <issue>3</issue>  
        <fpage>e27</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2004/3/e27/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.6.3.e27</pub-id>
        <pub-id pub-id-type="medline">15471753</pub-id>
        <pub-id pub-id-type="pii">v6e27</pub-id>
        <pub-id pub-id-type="pmcid">PMC1550613</pub-id></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Brennan</surname>
            <given-names>PF</given-names>
          </name>
          <name name-style="western">
            <surname>Aronson</surname>
            <given-names>AR</given-names>
          </name>
        </person-group>
        <article-title>Towards linking patients and clinical information: detecting UMLS concepts in e-mail</article-title>
        <source>J Biomed Inform</source>  
        <year>2003</year>  
        <volume>36</volume>  
        <issue>4-5</issue>  
        <fpage>334</fpage>  
        <lpage>41</lpage>  
        <pub-id pub-id-type="medline">14643729</pub-id>
        <pub-id pub-id-type="pii">S1532046403000984</pub-id></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>CA</given-names>
          </name>
          <name name-style="western">
            <surname>Stavri</surname>
            <given-names>PZ</given-names>
          </name>
          <name name-style="western">
            <surname>Chapman</surname>
            <given-names>WW</given-names>
          </name>
        </person-group>
        <article-title>In their own words? A terminological analysis of e-mail to a cancer information service</article-title>
        <source>Proc AMIA Symp</source>  
        <year>2002</year>  
        <fpage>697</fpage>  
        <lpage>701</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/12463914"/>
        </comment>  
        <pub-id pub-id-type="medline">12463914</pub-id>
        <pub-id pub-id-type="pii">D020002157</pub-id>
        <pub-id pub-id-type="pmcid">PMC2244265</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Harpring</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Baca</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>What Are Controlled Vocabularies?</article-title>
        <source>Introduction to Controlled Vocabularies: Terminology for Art, Architecture, and Other Cultural Works</source>  
        <year>2010</year>  
        <publisher-loc>Los Angeles, CA</publisher-loc>
        <publisher-name>Getty Publications</publisher-name></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>QT</given-names>
          </name>
          <name name-style="western">
            <surname>Tse</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>Exploring and developing consumer health vocabularies</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2006</year>  
        <volume>13</volume>  
        <issue>1</issue>  
        <fpage>24</fpage>  
        <lpage>9</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/lookup/pmidlookup?view=long&#38;pmid=16221948"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M1761</pub-id>
        <pub-id pub-id-type="medline">16221948</pub-id>
        <pub-id pub-id-type="pii">M1761</pub-id>
        <pub-id pub-id-type="pmcid">PMC1380193</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cimino</surname>
            <given-names>JJ</given-names>
          </name>
        </person-group>
        <article-title>Desiderata for controlled medical vocabularies in the twenty-first century</article-title>
        <source>Methods Inf Med</source>  
        <year>1998</year>  
        <month>11</month>  
        <volume>37</volume>  
        <issue>4-5</issue>  
        <fpage>394</fpage>  
        <lpage>403</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/9865037"/>
        </comment>  
        <pub-id pub-id-type="medline">9865037</pub-id>
        <pub-id pub-id-type="pii">98040394</pub-id>
        <pub-id pub-id-type="pmcid">PMC3415631</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Arts</surname>
            <given-names>DG</given-names>
          </name>
          <name name-style="western">
            <surname>Cornet</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>de Jonge</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>de Keizer</surname>
            <given-names>NF</given-names>
          </name>
        </person-group>
        <article-title>Methods for evaluation of medical terminological systems--a literature review and a case study</article-title>
        <source>Methods Inf Med</source>  
        <year>2005</year>  
        <volume>44</volume>  
        <issue>5</issue>  
        <fpage>616</fpage>  
        <lpage>25</lpage>  
        <pub-id pub-id-type="medline">16400369</pub-id>
        <pub-id pub-id-type="pii">05050616</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Greenberg</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Drake</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Metadata and the World Wide Web</article-title>
        <source>Encyclopedia of Library and Information Science</source>  
        <year>2003</year>  
        <publisher-loc>New York, NY</publisher-loc>
        <publisher-name>Marcel Deker, Inc</publisher-name>
        <fpage>1876</fpage>  
        <lpage>1888</lpage> </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mathes</surname>
            <given-names>A</given-names>
          </name>
        </person-group>


        <access-date>2016-11-02</access-date>
        <comment>Folksonomies - cooperative classification and communication through shared metadata. 2004 Dec.
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.adammathes.com/academic/computer-mediated-communication/folksonomies.html">http://www.adammathes.com/academic/computer-mediated-communication/folksonomies.html</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6lilnyrVJ"/></comment> </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>An exploratory study of user-centered indexing of published biomedical images</article-title>
        <source>J Med Libr Assoc</source>  
        <year>2013</year>  
        <month>01</month>  
        <volume>101</volume>  
        <issue>1</issue>  
        <fpage>73</fpage>  
        <lpage>6</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23405049"/>
        </comment>  
        <pub-id pub-id-type="doi">10.3163/1536-5050.101.1.011</pub-id>
        <pub-id pub-id-type="medline">23405049</pub-id>
        <pub-id pub-id-type="pii">JMLA-D-12-00011</pub-id>
        <pub-id pub-id-type="pmcid">PMC3543137</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>MacLean</surname>
            <given-names>DL</given-names>
          </name>
          <name name-style="western">
            <surname>Heer</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Identifying medical terms in patient-authored text: a crowdsourcing-based approach</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2013</year>  
        <volume>20</volume>  
        <issue>6</issue>  
        <fpage>1120</fpage>  
        <lpage>7</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=23645553"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/amiajnl-2012-001110</pub-id>
        <pub-id pub-id-type="medline">23645553</pub-id>
        <pub-id pub-id-type="pii">amiajnl-2012-001110</pub-id>
        <pub-id pub-id-type="pmcid">PMC3822103</pub-id></nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Doing-Harris</surname>
            <given-names>KM</given-names>
          </name>
          <name name-style="western">
            <surname>Zeng-Treitler</surname>
            <given-names>Q</given-names>
          </name>
        </person-group>
        <article-title>Computer-assisted update of a consumer health vocabulary through mining of social network data</article-title>
        <source>J Med Internet Res</source>  
        <year>2011</year>  
        <month>05</month>  
        <day>17</day>  
        <volume>13</volume>  
        <issue>2</issue>  
        <fpage>e37</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2011/2/e37/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.1636</pub-id>
        <pub-id pub-id-type="medline">21586386</pub-id>
        <pub-id pub-id-type="pii">v13i2e37</pub-id>
        <pub-id pub-id-type="pmcid">PMC3221384</pub-id></nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hicks</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Hogan</surname>
            <given-names>WR</given-names>
          </name>
          <name name-style="western">
            <surname>Rutherford</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Malin</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Xie</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Fellbaum</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Yin</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Fabbri</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Hanna</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Bian</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Mining Twitter as a First Step toward Assessing the Adequacy of Gender Identification Terms on Intake Forms</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2015</year>  
        <volume>2015</volume>  
        <fpage>611</fpage>  
        <lpage>20</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26958196"/>
        </comment>  
        <pub-id pub-id-type="medline">26958196</pub-id>
        <pub-id pub-id-type="pmcid">PMC4765681</pub-id></nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lewis</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Brennan</surname>
            <given-names>PF</given-names>
          </name>
          <name name-style="western">
            <surname>McCray</surname>
            <given-names>AT</given-names>
          </name>
          <name name-style="western">
            <surname>Tuttle</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Bachman</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>If We Build It, They Will Cometandardized Consumer Vocabularies</article-title>
        <source>Studies in Health Technology and Informatics</source>  
        <year>2001</year>  
        <volume>84</volume>  
        <fpage>1530</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://ebooks.iospress.nl/publication/19863"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kamdar</surname>
            <given-names>MR</given-names>
          </name>
          <name name-style="western">
            <surname>Tudorache</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Musen</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>A Systematic Analysis of Term Reuse and Term Overlap across Biomedical Ontologies</article-title>
        <source>Semantic Web – Interoperability, Usability, Applicability</source>  
        <year>2016</year>  
        <comment>(forthcoming). 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.semantic-web-journal.net/content/systematic-analysis-term-reuse-and-term-overlap-across-biomedical-ontologies-0"/></comment> </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>He</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Geller</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <article-title>A comparative analysis of the density of the SNOMED CT conceptual content for semantic harmonization</article-title>
        <source>Artif Intell Med</source>  
        <year>2015</year>  
        <month>05</month>  
        <volume>64</volume>  
        <issue>1</issue>  
        <fpage>29</fpage>  
        <lpage>40</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25890688"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.artmed.2015.03.002</pub-id>
        <pub-id pub-id-type="medline">25890688</pub-id>
        <pub-id pub-id-type="pii">S0933-3657(15)00025-1</pub-id>
        <pub-id pub-id-type="pmcid">PMC4457611</pub-id></nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>He</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Geller</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Elhanan</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Categorizing the Relationships between Structurally Congruent Concepts from Pairs of Terminologies for Semantic Harmonization</article-title>
        <source>AMIA Jt Summits Transl Sci Proc</source>  
        <year>2014</year>  
        <volume>2014</volume>  
        <fpage>48</fpage>  
        <lpage>53</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25717400"/>
        </comment>  
        <pub-id pub-id-type="medline">25717400</pub-id>
        <pub-id pub-id-type="pmcid">PMC4333698</pub-id></nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>He</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>de Coronado</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Piskorski</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Geller</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Topological-Pattern-based Recommendation of UMLS Concepts for National Cancer Institute Thesaurus</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2016</year>  
        <comment>(forthcoming)</comment> </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chandar</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Yaman</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Hoxha</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>He</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Weng</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Similarity-Based Recommendation of New Concepts to a Terminology</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2015</year>  
        <volume>2015</volume>  
        <fpage>386</fpage>  
        <lpage>95</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26958170"/>
        </comment>  
        <pub-id pub-id-type="medline">26958170</pub-id>
        <pub-id pub-id-type="pmcid">PMC4765685</pub-id></nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bodenreider</surname>
            <given-names>O</given-names>
          </name>
        </person-group>
        <article-title>The Unified Medical Language System (UMLS): integrating biomedical terminology</article-title>
        <source>Nucleic Acids Res</source>  
        <year>2004</year>  
        <month>01</month>  
        <day>1</day>  
        <volume>32</volume>  
        <issue>Database issue</issue>  
        <fpage>D267</fpage>  
        <lpage>70</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://nar.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=14681409"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id>
        <pub-id pub-id-type="medline">14681409</pub-id>
        <pub-id pub-id-type="pii">32/suppl_1/D267</pub-id>
        <pub-id pub-id-type="pmcid">PMC308795</pub-id></nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
        <source>World Health Organization</source>  
        <access-date>2016-11-02</access-date>

        <comment>Global Report on Diabetes. World Health Organization.
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://apps.who.int/iris/bitstream/10665/204871/1/9789241565257_eng.pdf?ua=1">http://apps.who.int/iris/bitstream/10665/204871/1/9789241565257_eng.pdf?ua=1</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6limIzVBr"/></comment> </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
        <source>American Diabetes Association</source>  
        <access-date>2016-11-02</access-date>
        <comment>Diabetes complications. 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.diabetes.org/living-with-diabetes/complications/">http://www.diabetes.org/living-with-diabetes/complications/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6limMPM2T"/></comment> </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Krug</surname>
            <given-names>EG</given-names>
          </name>
        </person-group>
        <article-title>Trends in diabetes: sounding the alarm</article-title>
        <source>Lancet</source>  
        <year>2016</year>  
        <month>04</month>  
        <day>9</day>  
        <volume>387</volume>  
        <issue>10027</issue>  
        <fpage>1485</fpage>  
        <lpage>6</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S0140-6736(16)30163-5"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/S0140-6736(16)30163-5</pub-id>
        <pub-id pub-id-type="medline">27061675</pub-id>
        <pub-id pub-id-type="pii">S0140-6736(16)30163-5</pub-id></nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Fox</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
 
        <access-date>2016-11-02</access-date>
        <comment>The social life of health information. Pew Research Center.
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.pewresearch.org/fact-tank/2014/01/15/the-social-life-of-health-information/">http://www.pewresearch.org/fact-tank/2014/01/15/the-social-life-of-health-information/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6limSyxAR"/></comment> </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Andersen</surname>
            <given-names>NB</given-names>
          </name>
          <name name-style="western">
            <surname>Söderqvist</surname>
            <given-names>T</given-names>
          </name>
        </person-group>

        <access-date>2016-11-02</access-date>
        <comment>Social media and public health research. Copenhagen, Denmark: University of Copenhagen. 2012 Aug 20.
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.museion.ku.dk/wp-content/uploads/FINAL-Social-Media-and-Public-Health-Research.pdf">http://www.museion.ku.dk/wp-content/uploads/FINAL-Social-Media-and-Public-Health-Research.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6limaWFsM"/></comment> </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Oh</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>The characteristics and motivations of health answerers for sharing information, knowledge, and experiences in online environments</article-title>
        <source>J. Am. Soc. Inf. Sci</source>  
        <year>2011</year>  
        <month>11</month>  
        <day>01</day>  
        <volume>63</volume>  
        <issue>3</issue>  
        <fpage>543</fpage>  
        <lpage>557</lpage>  
        <pub-id pub-id-type="doi">10.1002/asi.21676</pub-id></nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Giustini</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>How Web 2.0 is changing medicine</article-title>
        <source>BMJ</source>  
        <year>2006</year>  
        <month>12</month>  
        <day>23</day>  
        <volume>333</volume>  
        <issue>7582</issue>  
        <fpage>1283</fpage>  
        <lpage>4</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/17185707"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/bmj.39062.555405.80</pub-id>
        <pub-id pub-id-type="medline">17185707</pub-id>
        <pub-id pub-id-type="pii">333/7582/1283</pub-id>
        <pub-id pub-id-type="pmcid">PMC1761169</pub-id></nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
        <source>U.S. National Library of Medicine</source>  
        <access-date>2016-11-02</access-date>
        <comment>Fact Sheet of the UMLS Metathesaurus. 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.nlm.nih.gov/pubs/factsheets/umlsmeta.html">https://www.nlm.nih.gov/pubs/factsheets/umlsmeta.html</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6limjdjWl"/></comment> </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
        <source>U.S. National Library of Medicine</source>  
        <access-date>2016-11-02</access-date>
        <comment>SNOMED Clinical Terms. 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.nlm.nih.gov/healthit/snomedct/index.html">https://www.nlm.nih.gov/healthit/snomedct/index.html</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6limqaGNJ"/></comment> </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Finnegan</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>ICD-9-CM coding for physician billing</article-title>
        <source>J Am Med Rec Assoc</source>  
        <year>1989</year>  
        <month>02</month>  
        <volume>60</volume>  
        <issue>2</issue>  
        <fpage>22</fpage>  
        <lpage>3</lpage>  
        <pub-id pub-id-type="medline">10303229</pub-id></nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bennett</surname>
            <given-names>CC</given-names>
          </name>
        </person-group>
        <article-title>Utilizing RxNorm to support practical computing applications: capturing medication history in live electronic health records</article-title>
        <source>J Biomed Inform</source>  
        <year>2012</year>  
        <month>08</month>  
        <volume>45</volume>  
        <issue>4</issue>  
        <fpage>634</fpage>  
        <lpage>41</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S1532-0464(12)00041-X"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2012.02.011</pub-id>
        <pub-id pub-id-type="medline">22426081</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(12)00041-X</pub-id></nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
        <source>U.S. National Library of Medicine</source>  
        <access-date>2016-07-13</access-date>
        <comment>Fact Sheet of the UMLS Semantic Network. 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.nlm.nih.gov/pubs/factsheets/umlssemn.html">https://www.nlm.nih.gov/pubs/factsheets/umlssemn.html</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6iyTFIvxE"/></comment> </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng-Treitler</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Goryachev</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Keselman</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Rosendale</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Making texts in electronic health records comprehensible to consumers: a prototype translator</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2007</year>  
        <fpage>846</fpage>  
        <lpage>50</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18693956"/>
        </comment>  
        <pub-id pub-id-type="medline">18693956</pub-id>
        <pub-id pub-id-type="pmcid">PMC2655860</pub-id></nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zielstorff</surname>
            <given-names>Rd</given-names>
          </name>
        </person-group>
        <article-title>Controlled vocabularies for consumer health</article-title>
        <source>Journal of Biomedical Informatics</source>  
        <year>2003</year>  
        <month>08</month>  
        <volume>36</volume>  
        <issue>4-5</issue>  
        <fpage>326</fpage>  
        <lpage>333</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2003.09.015</pub-id></nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Miller</surname>
            <given-names>EA</given-names>
          </name>
          <name name-style="western">
            <surname>Pole</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Diagnosis blog: checking up on health blogs in the blogosphere</article-title>
        <source>Am J Public Health</source>  
        <year>2010</year>  
        <month>08</month>  
        <volume>100</volume>  
        <issue>8</issue>  
        <fpage>1514</fpage>  
        <lpage>9</lpage>  
        <pub-id pub-id-type="doi">10.2105/AJPH.2009.175125</pub-id>
        <pub-id pub-id-type="medline">20558802</pub-id>
        <pub-id pub-id-type="pii">AJPH.2009.175125</pub-id>
        <pub-id pub-id-type="pmcid">PMC2901304</pub-id></nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lagu</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Kaufman</surname>
            <given-names>EJ</given-names>
          </name>
          <name name-style="western">
            <surname>Asch</surname>
            <given-names>DA</given-names>
          </name>
          <name name-style="western">
            <surname>Armstrong</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>Content of weblogs written by health professionals</article-title>
        <source>J Gen Intern Med</source>  
        <year>2008</year>  
        <month>10</month>  
        <volume>23</volume>  
        <issue>10</issue>  
        <fpage>1642</fpage>  
        <lpage>6</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18649110"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1007/s11606-008-0726-6</pub-id>
        <pub-id pub-id-type="medline">18649110</pub-id>
        <pub-id pub-id-type="pmcid">PMC2533366</pub-id></nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <collab>Boulos Maged N Kamel</collab>
          <name name-style="western">
            <surname>Maramba</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Wheeler</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Wikis, blogs and podcasts: a new generation of Web-based tools for virtual collaborative clinical practice and education</article-title>
        <source>BMC Med Educ</source>  
        <year>2006</year>  
        <volume>6</volume>  
        <fpage>41</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.biomedcentral.com/1472-6920/6/41"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1472-6920-6-41</pub-id>
        <pub-id pub-id-type="medline">16911779</pub-id>
        <pub-id pub-id-type="pii">1472-6920-6-41</pub-id>
        <pub-id pub-id-type="pmcid">PMC1564136</pub-id></nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Oomen-Early</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Burke</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Entering the Blogosphere: Blogs as Teaching and Learning Tools in Health Education</article-title>
        <source>International Electronic Journal of Health Education</source>  
        <year>2007</year>  
        <volume>10</volume>  
        <fpage>186</fpage>  
        <lpage>196</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://eric.ed.gov/?id=EJ794207"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cobus</surname>
            <given-names>Laura</given-names>
          </name>
        </person-group>
        <article-title>Using blogs and wikis in a graduate public health course</article-title>
        <source>Med Ref Serv Q</source>  
        <year>2009</year>  
        <volume>28</volume>  
        <issue>1</issue>  
        <fpage>22</fpage>  
        <lpage>32</lpage>  
        <pub-id pub-id-type="doi">10.1080/02763860802615922</pub-id>
        <pub-id pub-id-type="medline">19197741</pub-id>
        <pub-id pub-id-type="pii">908112720</pub-id></nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Oh</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Oh</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Research agenda for social Q&#38;A</article-title>
        <source>Library &#38; Information Science Research</source>  
        <year>2009</year>  
        <volume>31</volume>  
        <issue>4</issue>  
        <fpage>205</fpage>  
        <lpage>9</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.lisr.2009.07.006</pub-id></nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="web">
        <source>Statista</source>  
        <access-date>2016-02-06</access-date>
        <comment>Statistics and facts about Tumblr. 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.statista.com/topics/2463/tumblr/">http://www.statista.com/topics/2463/tumblr/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6iyTGqcPl"/></comment> </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
        <source>DearBlogger: The Blogging Answers Community</source>  
        <access-date>2016-11-02</access-date>
        <comment>The Best Places to Start a Blog. 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.dearblogger.org/blogger-or-wordpress-better">http://www.dearblogger.org/blogger-or-wordpress-better</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6linVHtiQ"/></comment> </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Fitzpatrick</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <source>Lifehacker</source>  
        <year>2010</year>  
        <month>06</month>  
        <day>20</day>  
        <access-date>2016-11-02</access-date>
        <comment>Five Best Blogging Platforms. Lifehacker. 2010 Jun 20.
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://lifehacker.com/5568092/five-best-blogging-platforms">http://lifehacker.com/5568092/five-best-blogging-platforms</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6linqjSUQ"/></comment> </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="web">
        <source>Quantcast</source>  
        <access-date>2016-07-13</access-date>
        <comment>Statistics of Yahoo! Answers. 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.quantcast.com/answers.yahoo.com">https://www.quantcast.com/answers.yahoo.com</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6iyTJNzkf"/></comment> </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Temmerman</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Towards new ways of terminology description: The sociocognitive-approach</article-title>
        <source>Towards new ways of terminology description: the sociocognitive-approach</source>  
        <year>2000</year>  
        <publisher-loc>Amsterdam</publisher-loc>
        <publisher-name>J. Benjamins</publisher-name></nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Crystal</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <source>Dictionary of Linguistics and Phonetics (The Language Library)</source>  
        <year>2008</year>  
        <month>06</month>  
        <publisher-loc>Hoboken, NJ</publisher-loc>
        <publisher-name>Wiley-Blackwell</publisher-name></nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="web">
        <source>U.S. National Library of Medicine</source>  
        <access-date>2016-01-08</access-date>
        <comment>UMLS Glossary. 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.nlm.nih.gov/research/umls/new_users/glossary.html">https://www.nlm.nih.gov/research/umls/new_users/glossary.html</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6iyTK76nW"/></comment> </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="web">
        <source>Apache Software Foundation</source>  

        <access-date>2016-01-18</access-date>
        <comment>cTAKES (clinical Text Analysis and Knowledge Extraction System). 2016 Jan 18.
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://ctakes.apache.org">http://ctakes.apache.org</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6iyTLd7aW"/></comment> </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Garla</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Lo</surname>
            <given-names>RV</given-names>
          </name>
          <name name-style="western">
            <surname>Dorey-Stein</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Kidwai</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Scotch</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Womack</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Justice</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Brandt</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>The Yale cTAKES extensions for document classification: architecture and application</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2011</year>  
        <volume>18</volume>  
        <issue>5</issue>  
        <fpage>614</fpage>  
        <lpage>20</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=21622934"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000093</pub-id>
        <pub-id pub-id-type="medline">21622934</pub-id>
        <pub-id pub-id-type="pii">amiajnl-2011-000093</pub-id>
        <pub-id pub-id-type="pmcid">PMC3168305</pub-id></nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Baldridge</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <source>Apache Software Foundation</source>  
        <access-date>2016-11-02</access-date>
        <comment>The openNLP project. 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://opennlp.apache.org/index">http://opennlp.apache.org/index</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6lioR15Gv"/></comment> </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="web">
        <source>Apache Software Foundation</source>  
        <access-date>2016-11-02</access-date>
        <comment>The OpenNLP Documentation. 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://opennlp.apache.org/documentation.html">https://opennlp.apache.org/documentation.html</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6lioZxO8m"/></comment> </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Garla</surname>
            <given-names>VN</given-names>
          </name>
          <name name-style="western">
            <surname>Brandt</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Semantic similarity in the biomedical domain: an evaluation across knowledge sources</article-title>
        <source>BMC Bioinformatics</source>  
        <year>2012</year>  
        <month>10</month>  
        <day>10</day>  
        <volume>13</volume>  
        <fpage>261</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-13-261"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1471-2105-13-261</pub-id>
        <pub-id pub-id-type="medline">23046094</pub-id>
        <pub-id pub-id-type="pii">1471-2105-13-261</pub-id>
        <pub-id pub-id-type="pmcid">PMC3533586</pub-id></nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>He</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Park</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Z</given-names>
          </name>
        </person-group>
        <article-title>UMLS-Based Analysis of Medical Terminology Coverage for Tags in Diabetes-Related Blogs</article-title>
        <source>Philadelphia, PA</source>  
        <year>2016</year>  
        <conf-name>iConference 2016</conf-name>
        <conf-date>March 20-23, 2016</conf-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.ideals.illinois.edu/handle/2142/89441"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sarasohn-Kahn</surname>
            <given-names>J</given-names>
          </name>
        </person-group>

        <access-date>2016-11-02</access-date>
        <comment>The Wisdom of Patients: Health Care Meets Online Social Media. California Health Care Foundation. 2008 Apr.
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.chcf.org/publications/2008/04/the-wisdom-of-patients-health-care-meets-online-social-media">http://www.chcf.org/publications/2008/04/the-wisdom-of-patients-health-care-meets-online-social-media</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6liosAzP6"/></comment> </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <article-title>Beyond quality and accessibility: Source selection in consumer health information searching</article-title>
        <source>J Assn Inf Sci Tec</source>  
        <year>2014</year>  
        <month>01</month>  
        <day>07</day>  
        <volume>65</volume>  
        <issue>5</issue>  
        <fpage>911</fpage>  
        <lpage>927</lpage>  
        <pub-id pub-id-type="doi">10.1002/asi.23023</pub-id></nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
