<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v4i4e40</article-id>
    <article-id pub-id-type="pmid">27903489</article-id>
    <article-id pub-id-type="doi">10.2196/medinform.6373</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>Finding Important Terms for Patients in Their Electronic Health Records: A Learning-to-Rank Approach Using Expert Annotations</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Eysenbach</surname>
          <given-names>Gunther</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Pivovarov</surname>
          <given-names>Rimma</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Zhang</surname>
          <given-names>Shaodian</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1" corresp="yes">
      <name name-style="western">
        <surname>Chen</surname>
        <given-names>Jinying</given-names>
      </name>
      <degrees>PhD</degrees>
      <xref rid="aff1" ref-type="aff">1</xref>
      <address>
        <institution>Department of Quantitative Health Sciences</institution>
        <institution>University of Massachusetts Medical School</institution>
        <addr-line>368 Plantation Street</addr-line>
        <addr-line>Worcester, MA</addr-line>
        <country>United States</country>
        <phone>1 774 455 3527</phone>
        <fax>1 508 856 8993</fax>
        <email>jinying.chen@umassmed.edu</email>
      </address>  
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-7259-4301</ext-link></contrib>
      <contrib contrib-type="author" id="contrib2">
        <name name-style="western">
          <surname>Zheng</surname>
          <given-names>Jiaping</given-names>
        </name>
        <degrees>MS</degrees>
        <xref rid="aff2" ref-type="aff">2</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-7662-810X</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>Yu</surname>
          <given-names>Hong</given-names>
        </name>
        <degrees>PhD, FACMI</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-9263-5035</ext-link>
      </contrib>
    </contrib-group>
    <aff id="aff1">
    <sup>1</sup>
    <institution>Department of Quantitative Health Sciences</institution>
    <institution>University of Massachusetts Medical School</institution>  
    <addr-line>Worcester, MA</addr-line>
    <country>United States</country></aff>
    <aff id="aff2">
    <sup>2</sup>
    <institution>School of Computer Science</institution>
    <institution>University of Massachusetts</institution>  
    <addr-line>Amherst, MA</addr-line>
    <country>United States</country></aff>
    <aff id="aff3">
    <sup>3</sup>
    <institution>Bedford Veterans Affairs Medical Center</institution>
    <institution>Center for Healthcare Organization and Implementation Research</institution>  
    <addr-line>Bedford, MA</addr-line>
    <country>United States</country></aff>
    <author-notes>
      <corresp>Corresponding Author: Jinying Chen 
      <email>jinying.chen@umassmed.edu</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><season>Oct-Dec</season><year>2016</year></pub-date>
    <pub-date pub-type="epub">
      <day>30</day>
      <month>11</month>
      <year>2016</year>
    </pub-date>
    <volume>4</volume>
    <issue>4</issue>
    <elocation-id>e40</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>19</day>
        <month>7</month>
        <year>2016</year>
      </date>
      <date date-type="rev-request">
        <day>18</day>
        <month>8</month>
        <year>2016</year>
      </date>
      <date date-type="rev-recd">
        <day>24</day>
        <month>9</month>
        <year>2016</year>
      </date>
      <date date-type="accepted">
        <day>22</day>
        <month>10</month>
        <year>2016</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Jinying Chen, Jiaping Zheng, Hong Yu. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 30.11.2016.</copyright-statement>
    <copyright-year>2016</copyright-year>
    <license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/2.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://medinform.jmir.org/2016/4/e40/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="Background">
        <title>Background</title>
        <p>Many health organizations allow patients to access their own electronic health record (EHR) notes through online patient portals as a way to enhance patient-centered care. However, EHR notes are typically long and contain abundant medical jargon that can be difficult for patients to understand. In addition, many medical terms in patients’ notes are not directly related to their health care needs. One way to help patients better comprehend their own notes is to reduce information overload and help them focus on medical terms that matter most to them. Interventions can then be developed by giving them targeted education to improve their EHR comprehension and the quality of care.</p>
      </sec>
      <sec sec-type="Objective">
        <title>Objective</title>
        <p>We aimed to develop a supervised natural language processing (NLP) system called Finding impOrtant medical Concepts most Useful to patientS (FOCUS) that automatically identifies and ranks medical terms in EHR notes based on their importance to the patients.</p>
      </sec>
      <sec sec-type="Methods">
        <title>Methods</title>
        <p>First, we built an expert-annotated corpus. For each EHR note, 2 physicians independently identified medical terms important to the patient. Using the physicians’ agreement as the gold standard, we developed and evaluated FOCUS. FOCUS first identifies candidate terms from each EHR note using MetaMap and then ranks the terms using a support vector machine-based learn-to-rank algorithm. We explored rich learning features, including distributed word representation, Unified Medical Language System semantic type, topic features, and features derived from consumer health vocabulary. We compared FOCUS with 2 strong baseline NLP systems.</p>
      </sec>
      <sec sec-type="Results">
        <title>Results</title>
        <p>Physicians annotated 90 EHR notes and identified a mean of 9 (SD 5) important terms per note. The Cohen’s kappa annotation agreement was .51. The 10-fold cross-validation results show that FOCUS achieved an area under the receiver operating characteristic curve (AUC-ROC) of 0.940 for ranking candidate terms from EHR notes to identify important terms. When including term identification, the performance of FOCUS for identifying important terms from EHR notes was 0.866 AUC-ROC. Both performance scores significantly exceeded the corresponding baseline system scores (<italic>P</italic>&#60;.001). Rich learning features contributed to FOCUS’s performance substantially.</p>
      </sec>
      <sec sec-type="Conclusions">
        <title>Conclusions</title>
        <p>FOCUS can automatically rank terms from EHR notes based on their importance to patients. It may help develop future interventions that improve quality of care.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>electronic health records</kwd>
      <kwd>natural language processing</kwd>
      <kwd>information extraction</kwd>
      <kwd>supervised learning</kwd>
      <kwd>learning to rank</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background and Significance</title>
        <p>Greater patient involvement is indispensable in delivering high-quality patient-centered care. In one effort to achieve this goal, spurred by the Health Information Technology for Economic and Clinical Health Act [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>] and the Centers for Medicare and Medicaid Services Medicare Electronic Health Record (EHR) incentive program [<xref ref-type="bibr" rid="ref3">3</xref>], online patient portals have been widely adopted by health systems in the United States [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. In addition to giving patients structured information from EHRs (eg, laboratory test results and medication lists), the OpenNotes initiative [<xref ref-type="bibr" rid="ref5">5</xref>] and the Blue Button movement [<xref ref-type="bibr" rid="ref6">6</xref>] allow patients to access their full EHR notes through patient portals. Early evidence shows improved medical comprehension, health care management, and outcomes from the OpenNotes initiative [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <p>However, the benefits from accessing their full EHR notes would be compromised if patients cannot comprehend their notes. EHRs were created for physician-physician communication, and thus are frequently long and contain abundant medical jargon. Patients who usually do not have the same medical training as physicians are likely overwhelmed by the medical jargon, and therefore face an enormous challenge in comprehending their notes. For example, EHRs were written at an 8<sup>th</sup>-12<sup>th</sup>-grade reading level [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref13">13</xref>], which is above the average adult patient’s reading level of 7<sup>th</sup>-8<sup>th</sup>grade in the United States [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. In addition, 36% of adult Americans have limited health literacy [<xref ref-type="bibr" rid="ref19">19</xref>] and have shown difficulty in comprehending medical jargon [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. In fact, limited health literacy has been identified as one of the major barriers to patient online portal use, which includes the interpretation of information from EHRs [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref28">28</xref>]. Therefore, information technologies that support EHR comprehension are much needed to supplement the widespread use of patient portals and EHRs among patients.</p>
        <p>To support patient EHR comprehension, this work focuses on identifying medical terms that matter most to individual patients in their EHR notes—we used the 2 phrases “medical terms” and “medical jargon” interchangeably in this paper. Our work was motivated by 2 reasons. First, medical terms, which are fundamental to discourse-level EHR comprehension, have been shown to be obstacles for patients [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. Second, EHR notes incorporate a comprehensive description of patients’ medical courses yet patients may care about their immediate concerns. For example, a radiology report may describe technical details of tumor images; however, the patient may want to know only the tumor size, the diagnosis, and the prognosis. When helping patients comprehend their own EHR notes, the approach of explaining all the jargon in their notes may likely overwhelm them and may be unnecessary in the first place.</p>
        <p>Therefore, in this study we identify medical jargon most important to individual patients. Personalized interventions can then be developed by giving targeted educational materials to each individual patient.</p>
        <p>In order to find out whether medical terms can be prioritized, we asked physicians to identify terms important to patients in EHRs. <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> shows an excerpt from a typical EHR note from our corpus. Although there are many medical terms in this piece of text—here we only highlighted a subset of terms identified by MetaMap [<xref ref-type="bibr" rid="ref29">29</xref>] for illustration purposes—physicians identified only 5 terms most important for patients to know: <italic>thrombocytosis</italic>, <italic>Crohn disease</italic>, <italic>budesonide</italic>, <italic>diabetes mellitus</italic>, and <italic>metformin</italic>. Note that physicians do not mark many unfamiliar medical terms (eg, <italic>complete blood count</italic> [<italic>CBC</italic>], <italic>hematemesis</italic>, and <italic>epistaxis</italic>), suggesting that they do not rank terms based on their difficulty levels.</p>
        <boxed-text id="box1" position="float">
          <title>A sample electronic health record text where physicians identified important medical terms (bracketed with angle brackets). Other medical terms are italicized.</title>
          <p>xxx is a xx-year-old man referred for evaluation of &#60;thrombocytosis&#62;. Prior <italic>CBCs</italic> from xxx through xxx revealed <italic>platelet counts</italic> ranging from 400,000 to 500,000, but no more recent studies are available. He has long-standing &#60;Crohn disease&#62; and although he says he has not had <italic>gastrointestinal bleeding</italic> in the past, he has been given iron, which he is taking twice daily. He has black stool, but notes no blood and he has not had <italic>hematemesis</italic>. He notes no blood in his urine or sputum and he has no <italic>epistaxis</italic>. He discontinued the use of iron yesterday because he thought that might alleviate his gastrointestinal complaints, but he does not feel different today. He is cared for by Dr. xxx at xxx Hospital Medical Center in xxx. He has no history of prior cancers, <italic>tuberculosis</italic> or other infectious diseases. He has been taking &#60;budesonide&#62; for his &#60;Crohn disease&#62;. He has no unexplained fevers, although he states he often feels hot. He has no soaking sweats and has not had unexplained weight loss. He believes he was referred to an <italic>oncologist</italic> many years ago at xxx, but he cannot recall the reason for that referral, who the doctor was, or what the findings were. He often feels queasy and nauseated, but has no vomiting. He has loose stools up to 4 days per week, but has had a stable pattern of &#60;Crohn disease&#62;. Also notable for &#60;diabetes mellitus&#62; for which he takes &#60;metformin&#62; and has required no <italic>insulin</italic> and has had no complications of <italic>retinopathy</italic> or <italic>renal dysfunction</italic>. &#60;Crohn disease&#62; as described above and an enlarged prostate.</p>
        </boxed-text>
        <p>Our aim was to develop a supervised natural language processing (NLP) system called Finding impOrtant medical Concepts most Useful to patientS (FOCUS) to automatically rank those EHR (patient)-specific important terms as high. This task was challenging, as the problem could not be solved by using only simple strategies such as term unfamiliarity, term frequency, and handcrafted rules (details in the Discussion section). We therefore built FOCUS with supervised learning and rich features.</p>
        <p>To the best of our knowledge, our work is the first to successfully rank medical terms in EHR notes by focusing on patients’ needs. This is an important step toward information reduction and personalized interventions to improve patient EHR comprehension. Our contributions are multifold. First, we defined a new NLP task of prioritizing or ranking medical terms that are important for patients. Second, we developed a state-of-the-art learning-based NLP system to automate the task. Third, we explored novel semantically motivated learning features.</p>
        <p>By using a robust learning framework, FOCUS can be readily adapted to other NLP tasks including summarization and question answering.</p>
      </sec>
      <sec>
        <title>Related Works</title>
        <sec>
          <title>Natural Language Processing Systems Facilitating Concept-Level Electronic Health Record Comprehension</title>
          <p>There has been active research on linking medical terms to lay terms [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>], consumer-oriented definitions [<xref ref-type="bibr" rid="ref12">12</xref>] and educational materials [<xref ref-type="bibr" rid="ref32">32</xref>], and showing improved comprehension with such interventions [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>].</p>
          <p>On the issue of determining which medical terms to simplify, there is previous work that used frequency-based and/or context-based approaches to check if a term is unfamiliar to the average patient or if it has simpler synonyms [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. Such work focuses on identifying difficult medical terms and treats these terms as equally important.</p>
          <p>Our approach is different in 2 aspects: (1) we focus on finding important medical terms, which are not equivalent to difficult medical terms, as discussed in the Background and Significance subsection; and (2) our approach is patient centered and prioritizes important terms for each EHR note of individual patients. We developed several learning features, including term frequency, term position, term frequency-inverse document frequency (TF-IDF), and topic feature, to serve this purpose.</p>
          <p>It is worth noting that our approach is complementary to previous work. For example, in a real-world application, we can display the lay definitions for all the difficult medical terms in a patient’s EHR note, and then highlight those terms that FOCUS predicts to be most important to this patient.</p>
        </sec>
        <sec>
          <title>Single-Document Keyphrase Extraction</title>
          <p>Our work is inspired by, but different from, single-document keyphrase extraction (KE), which identifies terms or phrases representing important concepts and topics in a document. KE targets topics that the writers wanted to convey when writing the documents. Unlike KE, our work does not focus on topics important to physicians (ie, the writers and the target readers when writing the EHR notes), but rather focuses on patients, the new readers of the notes.</p>
          <p>Both supervised and unsupervised methods have been developed for KE [<xref ref-type="bibr" rid="ref33">33</xref>]. We use supervised methods, which in general perform better than unsupervised ones when training data is available.</p>
          <p>Most supervised methods formulate KE as a binary classification problem. The confidence scores output by the classification algorithms are used to rank candidate phrases. Various algorithms have been explored, such as naïve Bayes, decision tree, bagging, support vector machine (SVM), multilayer perceptron, and random forest (RF) [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref43">43</xref>]. In our study, we implemented RF [<xref ref-type="bibr" rid="ref43">43</xref>] as a strong baseline system.</p>
          <p>KE in the biomedical domain mainly focused on literature articles and domain-specific methods and features [<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref47">47</xref>]. For example, Li et al [<xref ref-type="bibr" rid="ref44">44</xref>] developed a software tool called keyphrase identification program (KIP) to extract keyphrases from medical articles. KIP used Medical Subject Headings (MeSH) as the knowledge base to compute a score to reflect a phrase’s domain specificity. It assigned each candidate phrase a rank score by multiplying its within-document term frequency and domain-specificity score.</p>
          <p>Different from the aforementioned approaches, we treat KE as a ranking problem and use the ranking SVM (rankSVM) approach [<xref ref-type="bibr" rid="ref48">48</xref>] as it has been shown to be effective in KE in scientific literature, news, and weblogs [<xref ref-type="bibr" rid="ref42">42</xref>].</p>
          <p>Common learning features used by previous work include frequency-based features (eg, TF-IDF), term-related features (eg, the term itself, its position in a document, and its length), document structure-based features (eg, whether a term occurs in the title or abstract of a scientific paper), and syntactic features (eg, the part-of-speech [POS] tags). Features derived from external resources, such as Wikipedia and query logs, have also been used to represent term importance [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. Unlike previous work, we explored rich semantic features specifically available to the medical domain.</p>
          <p>Medelyan and Witten [<xref ref-type="bibr" rid="ref45">45</xref>] developed a system that extends the widely used keyphrase extraction algorithm KEA [<xref ref-type="bibr" rid="ref34">34</xref>] by using semantic information from domain-specific thesauri, which they called KEA++. KEA++ has been applied to the medical domain, where it used MeSH vocabulary to extract candidate phrases from medical articles and used MeSH concept relations to compute its domain-specific feature. In this study, we adapted KEA++ to the EHR data and used the adapted KEA++ as a strong baseline system.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>A FOCUS Corpus of Electronic Health Records With Expert-Annotated Important Concepts</title>
        <p>We created a FOCUS corpus, which is a collection of 90 representative EHR discharge summaries and progress notes from the University of Massachusetts Memorial Hospital outpatient clinics. To maximize the representativeness, we selected notes from patients with 6 different but common primary clinical diagnoses: cancer, chronic obstructive pulmonary disease, diabetes, heart failure, hypertension, and liver failure. We deidentified the notes and then asked physicians to identify, for each note, terms important to patients.</p>
        <p>We adopted the expert annotation approach for this study for the following reasons. First, annotating important medical terms requires full comprehension of an EHR note. Such level of comprehension may be beyond the capacity of average patients [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. Previous work shows that even lay people with higher education (ie, college or graduate degrees) have difficulty with comprehending EHR notes [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. Second, physicians have specific medical training for communicating with patients and understanding their needs. Physicians' expertise would guide patients in understanding the most important aspects that are medically relevant to their health and well-being.</p>
        <p>We developed an annotation guideline (see <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>) to instruct physicians to identify at least 5 of the most important medical terms per EHR note, which the patients need to know in order to comprehend the note for the most important aspects medically relevant to their health and treatment course. For each note, we obtained annotations from 2 physicians and used the agreement from both physicians as the gold standard for our experiments. Three physicians did the annotation and annotated 48, 68, and 64 notes, respectively.</p>
      </sec>
      <sec>
        <title>FOCUS</title>
        <sec>
          <title>Overview</title>
          <p><xref ref-type="fig" rid="figure1">Figure 1</xref> shows the overview of FOCUS and its corpus and evaluation. In Step 2 of the approach, FOCUS first extracts candidate terms (Step 2.1) and then ranks them (Step 2.2). Since we focused on ranking in this study, we used MetaMap [<xref ref-type="bibr" rid="ref29">29</xref>], a widely used medical concept detection tool, to automatically identify candidate terms from each EHR note. We then applied rankSVM to rank the terms.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Overview of our approach: building the FOCUS corpus (Step 1), developing FOCUS (Step 2), and evaluation (Step 3). FOCUS: Finding impOrtant medical Concepts most Useful to patientS; EHR: electronic health record; rankSVM: ranking support vector machine.</p>
            </caption>
            <graphic xlink:href="medinform_v4i4e40_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Ranking Support Vector Machine</title>
          <p>RankSVM [<xref ref-type="bibr" rid="ref48">48</xref>] is a pairwise ranking method, which can learn to rank important terms in each EHR note as higher than nonimportant ones.</p>
          <p>Our training data for rankSVM contain the following: (1) a set <italic>E</italic> of EHR notes; (2) a list of candidate terms <italic>T</italic><sub>e</sub> associated with each EHR note <italic>e</italic>; and (3) for a term <italic>t</italic> ∊ <italic>T</italic><sub>e</sub>, a <italic>d</italic>-dimension feature vector <italic>x</italic><sub>t</sub>∊ <italic>R</italic><sup>d</sup>and a binary target value (ie, label) <italic>y</italic><sub>t</sub> which denotes whether <italic>t</italic> is an important medical term in <italic>e</italic>. In our case, <italic>y</italic><sub>t</sub> is 1 if <italic>t</italic> is important in <italic>e</italic> and 0 if not. In the general framework of ranking, <italic>y</italic><sub>t</sub> corresponds to the ranking order of <italic>t</italic>, and the more important <italic>t</italic> is, the higher order and the larger value of <italic>y</italic><sub>t</sub> it has. Let <italic>P</italic> be the set of term pairs (<italic>i, j</italic>), where term <italic>i</italic> and term <italic>j</italic> occur in the same EHR note and term <italic>i</italic> is important (<italic>y</italic><sub>i</sub>=1) and term <italic>j</italic> is not important (<italic>y</italic><sub>j</sub>=0) (ie, <italic>P</italic>={ (<italic>i, j</italic>) &#124; <italic>y</italic><sub>i</sub>&#62; <italic>y</italic><sub>j</sub>}). The rankSVM model is built by minimizing the objective function [<xref ref-type="bibr" rid="ref48">48</xref>], as defined by equation 1 in <xref ref-type="fig" rid="figure2">Figure 2</xref>, where <italic>w</italic> is the feature weight vector; <italic>ε</italic><sub>i,j</sub> is the slack variable that measures the model’s soft-margin error for term pair (<italic>i, j</italic>); <italic>C</italic> is a tuning parameter; and <italic>m</italic> is the total number of term pairs in <italic>P</italic>. The formulation in equation 1 in <xref ref-type="fig" rid="figure2">Figure 2</xref> ﬁnds a large-margin linear function that minimizes the number of pairs of training examples swapped with respect to their desired ranking order.</p>
          <p>We chose SVM<sup>rank</sup>[<xref ref-type="bibr" rid="ref49">49</xref>], which implements rankSVM in an efficient way by using a cutting-plane algorithm and learns from large sparse data in linear time.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Objective function used in training ranking support vector machine.</p>
            </caption>
            <graphic xlink:href="medinform_v4i4e40_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Baseline Features for Ranking</title>
          <p>We implemented 9 features commonly used for KE [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>].</p>
          <sec>
            <title>Frequency-Based Features</title>
            <p>The frequency-based features include term frequency, inverse document frequency, and TF-IDF. Term frequency is the number of occurrences of a candidate term in each individual EHR note. Inverse document frequency and TF-IDF are calculated in the standard way (see <xref ref-type="app" rid="app2">Multimedia Appendix 2</xref>). We used 6,237 clinical notes, which were selected by using the same 6 diagnoses used to select the 90 notes for the FOCUS corpus, to compute inverse document frequency.</p>
          </sec>
          <sec>
            <title>Term Structure-Based Features</title>
            <p>The term structure-based features include term length (TL) (ie, the total number of words contained in a term), the length of the longest word (by character) in a candidate term (maxWL), and a combined feature of TL and maxWL [<xref ref-type="bibr" rid="ref51">51</xref>], as defined in equation 2 in <xref ref-type="fig" rid="figure3">Figure 3</xref>.</p>
            <p>Since longer terms and words are less likely to be familiar to patients, these features may help distinguish between unfamiliar and common or familiar terms. Thus, these features may help rank as low EHR terms that are too common to be important (eg, <italic>blood</italic> and <italic>pain</italic>).</p>
            <fig id="figure3" position="float">
              <label>Figure 3</label>
              <caption>
                <p>Equation for defining a combined feature of TL and maxWL. TL: term length (ie, length of a candidate term by word); maxWL: length of the longest word (by character) in a candidate term.</p>
              </caption>
              <graphic xlink:href="medinform_v4i4e40_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Position Feature</title>
            <p>The position feature is the number of words preceding the first occurrence of a candidate term, normalized by the total number of words in the document. We used this feature because we found that the medical terms most specific to a patient often occur early in his/her EHR notes.</p>
          </sec>
          <sec>
            <title>Lexical Feature</title>
            <p>The lexical feature was found to be useful in domain-specific KE [<xref ref-type="bibr" rid="ref35">35</xref>]. In our experiments, we used Porter’s stemmer to normalize terms. Since EHR data is noisy, we empirically include a stemmed term only if it occurs at least 3 times in the training data to eliminate misspelled words.</p>
          </sec>
          <sec>
            <title>Part-of-Speech Feature</title>
            <p>We used the POS tag of the head word of each candidate term, as generated by the clinical Text Analysis and Knowledge Extraction System (cTAKES) [<xref ref-type="bibr" rid="ref52">52</xref>].</p>
          </sec>
        </sec>
        <sec>
          <title>Additional Features for Ranking</title>
          <sec>
            <title>Distributed Word Representation (Word Embedding)</title>
            <p>Word embeddings are distributed vector representations of words learned from large unlabeled data. Words sharing similar semantics and context are expected to be close in their word vector space [<xref ref-type="bibr" rid="ref53">53</xref>].</p>
            <p>We include this feature because word embedding has emerged as a powerful technique for word representation. It has shown to improve several biomedical and clinical NLP tasks, such as biomedical named entity recognition [<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref55">55</xref>], protein-protein interaction detection [<xref ref-type="bibr" rid="ref56">56</xref>], biomedical event extraction [<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>], adverse drug event detection [<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref60">60</xref>], ranking biomedical synonyms [<xref ref-type="bibr" rid="ref61">61</xref>], and disambiguating clinical abbreviations [<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref63">63</xref>].</p>
            <p>We trained a neural language model to learn word embeddings. Specifically, we used Word2Vec software to create the skip-gram word embeddings [<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref64">64</xref>]. We trained Word2Vec using a combined text corpus (over 3G words) of English Wikipedia, articles from PubMed Open Access, and 99,735 EHR notes from the Pittsburg corpus (Chapman W, University of Pittsburgh NLP Repository; using this data requires a license). We set the training parameters based on the study of Pyysalo et al [<xref ref-type="bibr" rid="ref65">65</xref>]. We represented multi-word terms with the mean of individual word vectors. In this work, we used 200-dimension word vectors, with each dimension normalized to (0,1).</p>
          </sec>
          <sec>
            <title>Unified Medical Language System Semantic Type</title>
            <p>We mapped the candidate terms to Unified Medical Language System (UMLS) semantic types by using MetaMap, and included these semantic types as learning features.</p>
          </sec>
          <sec>
            <title>Consumer Health Vocabulary Features</title>
            <p>We derived 7 binary features from the consumer health vocabulary (CHV) [<xref ref-type="bibr" rid="ref66">66</xref>]. The CHV is a collaborative resource and incorporates terms extracted from various consumer health sites, such as queries submitted to MedLinePlus and postings in health-focused online discussion forums [<xref ref-type="bibr" rid="ref67">67</xref>-<xref ref-type="bibr" rid="ref73">73</xref>]. The CHV contained 152,338 terms, most of which are consumer health terms [<xref ref-type="bibr" rid="ref71">71</xref>-<xref ref-type="bibr" rid="ref73">73</xref>]. Zeng et al [<xref ref-type="bibr" rid="ref72">72</xref>] mapped these consumer health terms to the UMLS concepts by a semiautomatic approach. As a result of this work, the CHV encompasses lay terms as well as corresponding medical jargon.</p>
            <p>In the FOCUS corpus, 89% of important terms are in the CHV, while a smaller percentage of nonimportant terms (76%) are in the CHV. This suggests that the presence of an EHR term in the CHV is indicative of the term’s importance from the perspective of patients (ie, health consumers). We therefore include a binary feature to denote whether a candidate term is in the CHV.</p>
            <p>In addition, we derived 6 binary features from CHV familiarity scores. For extended usability, the CHV assigns familiarity scores to 57.89% (88,189/152,338) of its terms. CHV familiarity scores estimate the likelihood that a medical term can be understood by an average reader [<xref ref-type="bibr" rid="ref74">74</xref>] and have values between 0 and 1, with 1 being most familiar and 0 being least familiar. CHV provides different types of familiarity scores [<xref ref-type="bibr" rid="ref30">30</xref>]. Following Zeng-Treitler et al [<xref ref-type="bibr" rid="ref30">30</xref>], we used the combined score and converted the continuous value into categorical features. Specifically, we divided the feature value range [0,1] into 5 equal-range bins, resulting in 5 binary features. The intuition behind these features is that medical terms with different levels of familiarity may be different in their importance to patients. For example, common terms (ie, terms that fall into the highest bin) such as <italic>disease</italic> and <italic>physicians</italic> are too general to be important. In addition, we included the sixth binary feature to indicate whether a candidate term has a CHV familiarity score.</p>
          </sec>
          <sec>
            <title>Topic Features</title>
            <p>Topic features are real-valued features in (0,1) to indicate the topic coherence between a candidate term and the EHR note containing this term. We compute topic features <italic>P(t&#124;e)</italic> by equations 3 and 4 in <xref ref-type="fig" rid="figure4">Figure 4</xref>, where <italic>P(t&#124;e)</italic> is the probability of a candidate term <italic>t</italic> conditioned on an EHR note <italic>e</italic>; <italic>P(w&#124;e)</italic> is the probability of a word <italic>w</italic> conditioned on <italic>e</italic>; <italic>P</italic> (<italic>w</italic> &#124; <italic>topic</italic><sub>i</sub>) and <italic>P</italic> (<italic>topic</italic><sub>i</sub> &#124; <italic>e</italic>) are word-topic and topic-EHR note distributions estimated by the topic model; and <italic>K</italic> is the number of topics used in topic modeling.</p>
            <p>We trained 3 latent Dirichlet allocation topic models with <italic>K</italic> set to 50, 100, and 200, respectively, after testing different <italic>K</italic>s on 6,237 clinical notes, which are the same as the notes used to compute IDF, using the MAchine Learning for LanguagE Toolkit (MALLET) [<xref ref-type="bibr" rid="ref75">75</xref>] with default parameters to obtain 3 topic features.</p>
            <fig id="figure4" position="float">
              <label>Figure 4</label>
              <caption>
                <p>Equations for defining topic feature.</p>
              </caption>
              <graphic xlink:href="medinform_v4i4e40_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Training and Evaluation Settings</title>
        <p>We created the training data from the FOCUS corpus as follows. We first applied MetaMap to the 90 notes in the FOCUS corpus. For each note, we took as positive examples those terms that were both identified by MetaMap and judged by physicians to be important to patients. We expanded the set of positive terms by using relaxed string match (details in the Evaluation Metrics subsection). The remaining terms identified by MetaMap were used as negative examples. This process resulted in a total of 690 positive and 21,809 negative terms from 90 notes.</p>
        <p>Note that our 690 positive terms are less than the 793 terms annotated by physicians. This is because MetaMap missed some terms, many of which are multi-words with embedded UMLS concepts (eg, <italic>autologous stem cell transplant</italic> and <italic>insulin-dependent diabetic</italic>). Although we did not use these terms for training and for 10-fold cross-validation, we included them as positive terms for our final evaluation (as described in the Evaluation Metrics subsection).</p>
        <p>We used the aforementioned training set for all the systems except 1 baseline system, adapted KEA++ (details in the Baseline Systems subsection), as it had its own procedure for extracting candidate terms and generating training data.</p>
        <p>Previous work has shown that approximately 50-100 documents are sufficient to train supervised KE systems in the biomedical domain [<xref ref-type="bibr" rid="ref45">45</xref>], suggesting that our 90 EHR notes, although a small size, may be sufficient. Our results empirically validated this hypothesis.</p>
      </sec>
      <sec>
        <title>Baseline Systems</title>
        <sec>
          <title>Adapted KEA++</title>
          <p>The keyphrase extraction algorithm KEA [<xref ref-type="bibr" rid="ref34">34</xref>] has been frequently used as a strong baseline in previous work [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. KEA++ [<xref ref-type="bibr" rid="ref45">45</xref>] is an extension of KEA with the added capacity for domain adaptation.</p>
          <p>KEA++ is based on naïve Bayes and uses the following 4 features: TF-IDF, term position, term length in words, and a knowledge-based feature node degree. The last feature computes the number of semantic links in a knowledge base that connect a candidate phrase to other phrases in the document. In addition, it supports preselection and filtering of candidate terms by using controlled vocabularies, which we adapted to the clinical vocabularies.</p>
          <p>Specifically, we included all the UMLS terms identified by MetaMap from the 90 FOCUS notes. We also included the complete list of medical terms from 3 comprehensive clinical vocabularies: MeSH, Systematized Nomenclature of Medicine (SNOMED), and the ninth revision of the International Classification of Diseases (ICD-9). To compute the node degree feature, we mapped terms in this controlled vocabulary to the UMLS concepts and incorporated concept relations (eg, <italic>Is-a</italic> and <italic>Part-of</italic>) from MeSH, SNOMED, and ICD-9.</p>
        </sec>
        <sec>
          <title>Random Forest</title>
          <p>RF [<xref ref-type="bibr" rid="ref76">76</xref>] is an ensemble learning method that combines multiple decision trees for classification or regression. RF extends the idea of <italic>bagging</italic> [<xref ref-type="bibr" rid="ref77">77</xref>] with a random selection of features [<xref ref-type="bibr" rid="ref78">78</xref>-<xref ref-type="bibr" rid="ref80">80</xref>] to improve robustness and generalizability. The RF classification method achieved the state-of-the-art performance—outperforming KEA and kernel SVMs—in extracting keyphrases from scientific literature [<xref ref-type="bibr" rid="ref43">43</xref>].</p>
          <p>We used the RF classification algorithm for our study. Assuming <italic>t</italic> is a candidate term from an EHR note <italic>e</italic>, the prediction of RF on (<italic>t, e</italic>), <italic>ƒ</italic>(<italic>t,e</italic>), is calculated by equation 5 in <xref ref-type="fig" rid="figure5">Figure 5</xref>, where <italic>ƒ</italic><sub><italic>k</italic></sub>(<italic>t,e</italic>) is the prediction on (<italic>t, e</italic>) (ie, the predicted possibility of <italic>t</italic> being an important medical term in <italic>e</italic>) by the <italic>k</italic>th decision tree among <italic>B</italic> decision trees built for RF (see more details below). According to equation 5 in <xref ref-type="fig" rid="figure5">Figure 5</xref>, <italic>ƒ</italic>(<italic>t,e</italic>) represents the averaged predicted possibility of <italic>t</italic> being an important medical term in <italic>e</italic> and, therefore, can be used to rank candidate terms in <italic>e</italic>.</p>
          <p>Each individual decision tree <italic>ƒ<sub>k</sub></italic> is built as follows: assuming the training set contains <italic>N</italic> labeled examples (ie, <italic>N</italic> pairs of <italic>t</italic> and <italic>e</italic>, labeled as 1 if <italic>t</italic> is important in <italic>e</italic> and 0 if not) represented by <italic>d</italic> features, a single tree is built on <italic>N</italic> examples randomly sampled with replacement from this training set. When growing the tree, at each node the algorithm searches a randomly selected subset of the <italic>d</italic> features and selects 1 feature to create an if-then-else decision rule to branch the tree (ie, splitting the training examples at this node base on their feature values for the selected feature). Common criteria for selecting the feature that best splits a node include Gini impurity and information gain. When a node contains examples from the same class or its impurity is below a threshold, splitting stops and the node becomes a leaf node.</p>
          <p>For a new example (<italic>t, e</italic>), RF assigns (<italic>t, e</italic>) to a leaf node of each individual decision tree by applying the decision rules learned from the training phase. The term <italic>ƒ<sub>k</sub></italic>(<italic>t,e</italic>) in equation 5 in <xref ref-type="fig" rid="figure5">Figure 5</xref> is calculated as the fraction of positive training examples in the leaf node of the <italic>k</italic>th decision tree where (<italic>t, e</italic>) is assigned.</p>
          <p>RF uses the same features as FOCUS. We used scikit-learn [<xref ref-type="bibr" rid="ref81">81</xref>] to develop RF. We set the parameter <italic>B</italic> by minimizing the out-of-bag error during training and used default values for other parameters.</p>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>Prediction function of random forest.</p>
            </caption>
            <graphic xlink:href="medinform_v4i4e40_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Evaluation Metrics</title>
        <sec>
          <title>Precision, Recall, and <italic>F</italic>-score at Rank n</title>
          <p>We report the averaged precision, recall, and <italic>F</italic>-score at ranks 5 and 10, abbreviated as P5, R5, and F5; and P10, R10, and F10, respectively. These metrics measure system performance for top ranks and are widely used to evaluate KE systems. We computed these metrics for the final evaluation (Step 3 in <xref ref-type="fig" rid="figure1">Figure 1</xref>) where we used all the gold-standard important terms as positive examples, including those that would never be included in the stage of candidate term extraction.</p>
        </sec>
        <sec>
          <title>Area Under the Receiver Operating Characteristic Curve</title>
          <p>Area under the receiver operating characteristic curve (AUC-ROC) is a metric widely used for evaluating ranking outputs. It computes the area under a receiver operating curve, which plots the true positive rate (y-coordinate) against the false positive rate (x-coordinate) at various threshold settings. To evaluate a system, we compute its AUC-ROC for each EHR note in the FOCUS corpus and report the averaged value. AUC-ROC measures the performance of the global ranking. Because both candidate term extraction and ranking affect the quality of global ranking, we report 2 AUC-ROC metrics: AUC-ROC<sub>ranking</sub> and AUC-ROC<sub>KE</sub>. AUC-ROC<sub>ranking</sub> is computed on the candidate terms extracted by a system. Thereby, if a gold-standard important term is missed in candidate term extraction, it will not affect the system’s AUC-ROC<sub>ranking</sub>. Since this metric is informative about the ranking performance of a system, we used it to evaluate the cross-validation results on ranking candidate terms (Step 2.2 in <xref ref-type="fig" rid="figure1">Figure 1</xref>). AUC-ROC<sub>KE</sub> is computed by using all the gold-standard important terms as positive examples and measures the combined performance of candidate term extraction and ranking (Step 3 in <xref ref-type="fig" rid="figure1">Figure 1</xref>).</p>
          <p>In the evaluation step, we use relaxed string match to determine true positives, as exact match is known to underestimate performance as perceived by human judges [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref82">82</xref>]. Specifically, we treat a term from the system output as a true positive if it either exactly matches or subsumes a gold-standard important term (eg, <italic>non-Hodgkin lymphoma</italic> subsumes <italic>lymphoma</italic>). We allow <italic>subsume</italic> but not <italic>part-of</italic> match in relaxed string match, as previous work found that the former aligned well with human judges but the latter did not [<xref ref-type="bibr" rid="ref82">82</xref>]. For example, a part of an important term may be too general to be important (eg, <italic>disease</italic> in <italic>Crohn's disease</italic> and <italic>iron</italic> in <italic>iron deficiency</italic>).</p>
        </sec>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>The paired samples <italic>t</italic> test was used for significance testing for the performance difference of 2 systems.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Statistics of FOCUS Corpus</title>
        <p>For each note, we treat the terms agreed by 2 physicians as the gold-standard important terms. In total, the physicians have identified 793 important medical terms from the 90 FOCUS notes (mean 9 [SD 5] terms per note). The Cohen’s kappa coefficient for annotation agreement (microaverage) is .51. <xref ref-type="table" rid="table1">Table 1</xref> summarizes the statistics of the FOCUS corpus.</p>
        <p>The important terms identified by the physicians cover a wide range of topics, as represented by the UMLS semantic types. <xref ref-type="table" rid="table2">Table 2</xref> shows term frequency and example terms for the 8 major topics.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Statistics of the FOCUS<sup>a</sup> corpus.</p>
          </caption>
          <table width="565" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="344"/>
            <col width="199"/>
            <thead>
              <tr valign="top">
                <td>Characteristics of the FOCUS corpus</td>
                <td><italic>N</italic> or mean (SD)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Number of notes, <italic>N</italic></td>
                <td>90</td>
              </tr>
              <tr valign="top">
                <td>Number of words per EHR<sup>b</sup> note, mean (SD)</td>
                <td>816 (133)</td>
              </tr>
              <tr valign="top">
                <td>Number of candidate terms identified by MetaMap per EHR note, mean (SD)</td>
                <td>250 (42)</td>
              </tr>
              <tr valign="top">
                <td>Number of important medical terms identified by physicians per EHR note, mean (SD)</td>
                <td>9 (5)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>FOCUS: Finding impOrtant medical Concepts most Useful to patientS.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>EHR: electronic health record.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>The 8 major topics in the FOCUS<sup>a</sup> corpus.</p>
          </caption>
          <table width="620" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="164"/>
            <col width="146"/>
            <col width="278"/>
            <thead>
              <tr valign="top">
                <td>UMLS<sup>b</sup> semantic type</td>
                <td>Number of important terms, <italic>n</italic></td>
                <td>Example terms</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Disease or syndrome</td>
                <td>295</td>
                <td>autoimmune hemolytic anemia, gastroesophageal reflux, pancytopenia, Sjogren's syndrome, osteoporosis</td>
              </tr>
              <tr valign="top">
                <td>Organic chemical</td>
                <td>88</td>
                <td>atenolol, vincristine, warfarin, Wellbutrin, Zocor</td>
              </tr>
              <tr valign="top">
                <td>Finding</td>
                <td>59</td>
                <td>alopecia, hematuria, hypertension, NSTEMI (non-ST-elevation myocardial infarction), retinopathy</td>
              </tr>
              <tr valign="top">
                <td>Neoplastic process</td>
                <td>35</td>
                <td>dermoid, large B cell lymphoma, pancreatic neoplasm, thyroid nodule</td>
              </tr>
              <tr valign="top">
                <td>Therapeutic or preventive procedure</td>
                <td>34</td>
                <td>chemotherapy, dialysis, immunosuppression, kidney transplantation, pancreatectomy</td>
              </tr>
              <tr valign="top">
                <td>Amino acid, peptide, or protein<sup>c</sup></td>
                <td>30</td>
                <td>basal insulin, Rituxan, Neupogen, Synthroid, hemoglobin A1C, HPL (human placental lactogen)</td>
              </tr>
              <tr valign="top">
                <td>Pathologic function</td>
                <td>25</td>
                <td>atrial fibrillation, autonomic dysfunction, BPH (benign prostatic hyperplasia), microscopic hematuria, systolic dysfunction</td>
              </tr>
              <tr valign="top">
                <td>Diagnostic procedure</td>
                <td>17</td>
                <td>thyroid ultrasound, echocardiogram, endoscopy, biopsy, cardiac catheterization</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>FOCUS: Finding impOrtant medical Concepts most Useful to patientS.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>UMLS: Unified Medical Language System.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>Electronic health record terms in this topic were split into 2 subtopics: medicine (denoted by their ingredients) and laboratory measure.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Most of the important terms annotated by physicians are specific to individual patients or notes. We used 2 criteria to select terms that may in general be important to patients: (1) the term occurs in more than 10% (9/90) of notes in the FOCUS corpus; and (2) the term was annotated as an important term for over 50% of the notes containing it. Only 4 terms were qualified and selected (the 2 bracketed numbers following the terms are the number of notes containing the term and the number of notes for which the term was annotated as important): <italic>coronary artery disease</italic> (20/14), <italic>osteoarthritis</italic> (19/10), <italic>anemia</italic> (13/7), and <italic>prednisone</italic> (10/6).</p>
        <p>In addition, we made several observations from the FOCUS corpus. First, physicians typically excluded highly domain-specific terms that are very difficult for patients to understand. For example, the terms describing surgical procedures in detail or the anatomical parts of organs were excluded. Second, physicians often selected diseases and other information that are of immediate concern to patients, thus excluding other comorbidity diseases, for example. </p>
      </sec>
      <sec>
        <title>Candidate Term Extraction</title>
        <p>On average, adapted KEA++ extracts 342 candidate terms per note from the FOCUS corpus, which match 86% of the gold-standard physician annotated terms; FOCUS (the same for RF) extracts 250 candidates per note, which match 89% of the gold-standard terms.</p>
      </sec>
      <sec>
        <title>Evaluation on FOCUS Corpus</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the evaluation results on the FOCUS corpus, where FOCUS achieves the best results and RF is the second best.</p>
        <p>The performance difference between FOCUS and adapted KEA++ is statistically significant for all the metrics (<italic>P</italic>&#60;.001). The difference between FOCUS and RF is also statistically significant for all the metrics (see <italic>P</italic> values in <xref ref-type="table" rid="table3">Table 3</xref>).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Performance of different natural language processing systems.</p>
          </caption>
          <table width="624" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="133"/>
            <col width="39"/>
            <col width="39"/>
            <col width="39"/>
            <col width="39"/>
            <col width="39"/>
            <col width="39"/>
            <col width="92"/>
            <col width="73"/>
            <thead>
              <tr valign="top">
                <td>System</td>
                <td>P5<sup>a</sup></td>
                <td>R5<sup>b</sup></td>
                <td>F5<sup>c</sup></td>
                <td>P10<sup>d</sup></td>
                <td>R10<sup>e</sup></td>
                <td>F10<sup>f</sup></td>
                <td>AUC-ROC<sub>ranking</sub><sup>g</sup></td>
                <td>AUC-ROC<sub>KE</sub><sup>h</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Adapted KEA++<sup>i</sup></td>
                <td>0.333</td>
                <td>0.211</td>
                <td>0.239</td>
                <td>0.281</td>
                <td>0.362</td>
                <td>0.292</td>
                <td>0.890</td>
                <td>0.780</td>
              </tr>
              <tr valign="top">
                <td>RF<sup>j</sup></td>
                <td>0.409</td>
                <td>0.267</td>
                <td>0.299</td>
                <td>0.339</td>
                <td>0.416</td>
                <td>0.346</td>
                <td>0.891</td>
                <td>0.821</td>
              </tr>
              <tr valign="top">
                <td>FOCUS<sup>k</sup></td>
                <td>0.462</td>
                <td>0.305</td>
                <td>0.341</td>
                <td>0.369</td>
                <td>0.464</td>
                <td>0.381</td>
                <td>0.940</td>
                <td>0.866</td>
              </tr>
              <tr valign="top">
                <td><italic>P</italic> (FOCUS vs RF)</td>
                <td>.01</td>
                <td>.01</td>
                <td>.01</td>
                <td>.045</td>
                <td>.03</td>
                <td>.02</td>
                <td>&#60;.001</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>P5: precision at rank 5.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>R5: recall at rank 5.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>F5: <italic>F</italic>-score at rank 5.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>P10: precision at rank 10.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>R10: recall at rank 10.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>F10: <italic>F</italic>-score at rank 10.</p>
            </fn>
            <fn id="table3fn7">
              <p><sup>g</sup>AUC-ROC<sub>ranking</sub>: area under the receiver operating characteristic curve computed on the candidate terms extracted by a system.</p>
            </fn>
            <fn id="table3fn8">
              <p><sup>h</sup>AUC-ROC<sub>KE</sub>: area under the receiver operating characteristic curve (KE: keyphrase extraction) computed by using all the gold-standard important terms as positive examples.</p>
            </fn>
            <fn id="table3fn9">
              <p><sup>i</sup>KEA++: extension of the keyphrase extraction algorithm KEA.</p>
            </fn>
            <fn id="table3fn10">
              <p><sup>j</sup>RF: random forest.</p>
            </fn>
            <fn id="table3fn11">
              <p><sup>k</sup>FOCUS: Finding impOrtant medical Concepts most Useful to patientS.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <boxed-text id="box2" position="float">
          <title>Top-10 terms identified by different natural language processing systems for the full note containing the electronic health record excerpt in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>. True positives are italicized.</title><p>Adapted KEA++: <italic>Crohn disease</italic>, cirrhosis, <italic>metformin</italic>, recent, iron deficiency, <italic>thrombocytosis</italic>, Crohn, <italic>diabetes mellitus</italic>, anemia, omeprazole</p><p>RF (random forest): cirrhosis, iron deficiency anemia, iron deficiency, <italic>thrombocytosis</italic>, fenofibrate, alcohol, cheilosis, <italic>Crohn disease</italic>, <italic>myeloproliferative neoplasms, metformin</italic></p><p>FOCUS (Finding impOrtant medical Concepts most Useful to patientS): <italic>thrombocytosis, diabetes mellitus,</italic> cirrhosis, <italic>diabetes, metformin,</italic> omeprazole, iron deficiency anemia, fenofibrate, <italic>Crohn disease, budesonide</italic></p>
        </boxed-text>
        <p><xref ref-type="boxed-text" rid="box2">Textbox 2</xref> shows the top-10 terms identified by each of the 3 systems for the full note containing the EHR excerpt in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> (where true positives are italicized). The AUC-ROC<sub>KE</sub> scores achieved by the 3 systems on the full note are 0.868 (FOCUS), 0.809 (adapted KEA++), and 0.857 (RF).</p>
      </sec>
      <sec>
      <title>Effects of Additional Features</title>
      <p>We tested the effects of the additional features on FOCUS and RF. The results (see <xref ref-type="table" rid="table4">Table 4</xref>) show that the additional features improve the performances of both FOCUS and RF substantially (FOCUS vs FOCUS-base and RF vs RF-base). The difference is statistically significant for all the metrics except R10 between RF and RF-base.</p>
      <p>We further tested the effect of each additional feature by adding it on FOCUS-base. The results (see Table A3-1 in <xref ref-type="app" rid="app3">Multimedia Appendix 3</xref>) show that each additional feature improves the baseline features to a certain degree.</p>
      <p>We then tested FOCUS’s performance by using only additional features. The results (see Table A3-2 in <xref ref-type="app" rid="app3">Multimedia Appendix 3</xref>) show that word embedding is the best single feature, but still performs significantly worse than using all additional features for all the metrics (see row 5 in Table A3-2 in <xref ref-type="app" rid="app3">Multimedia Appendix 3</xref> for <italic>P</italic> values). In addition, using only additional features performs significantly worse than using all features for all the metrics (<italic>P</italic>&#60;.001).</p>  
      <table-wrap position="float" id="table4">
        <label>Table 4</label>
        <caption>
          <p>Performance of natural language processing systems with and without the additional features.</p>
        </caption>
        <table width="594" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="108"/>
          <col width="40"/>
          <col width="40"/>
          <col width="40"/>
          <col width="40"/>
          <col width="40"/>
          <col width="35"/>
          <col width="85"/>
          <col width="73"/>
          <thead>
            <tr valign="top">
              <td>System</td>
              <td>P5<sup>a</sup></td>
              <td>R5<sup>b</sup></td>
              <td>F5<sup>c</sup></td>
              <td>P10<sup>d</sup></td>
              <td>R10<sup>e</sup></td>
              <td>F10<sup>f</sup></td>
              <td>AUC-ROC<sub>ranking</sub><sup>g</sup></td>
              <td>AUC-ROC<sub>KE</sub><sup>h</sup></td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>FOCUS-base<sup>i</sup></td>
              <td>0.413</td>
              <td>0.256</td>
              <td>0.295</td>
              <td>0.331</td>
              <td>0.401</td>
              <td>0.337</td>
              <td>0.911</td>
              <td>0.840</td>
            </tr>
            <tr valign="top">
              <td>FOCUS<sup>j</sup></td>
              <td>0.462</td>
              <td>0.305</td>
              <td>0.341</td>
              <td>0.369</td>
              <td>0.464</td>
              <td>0.381</td>
              <td>0.940</td>
              <td>0.866</td>
            </tr>
            <tr valign="top">
              <td><italic>P</italic> (FOCUS vs FOCUS-base)</td>
              <td>.03</td>
              <td>.02</td>
              <td>.02</td>
              <td>.003</td>
              <td>&#60;.001</td>
              <td>.001</td>
              <td>&#60;.001</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>RF-base<sup>k</sup></td>
              <td>0.349</td>
              <td>0.219</td>
              <td>0.251</td>
              <td>0.303</td>
              <td>0.381</td>
              <td>0.315</td>
              <td>0.848</td>
              <td>0.781</td>
            </tr>
            <tr valign="top">
              <td>RF<sup>l</sup></td>
              <td>0.409</td>
              <td>0.267</td>
              <td>0.299</td>
              <td>0.339</td>
              <td>0.416</td>
              <td>0.346</td>
              <td>0.891</td>
              <td>0.821</td>
            </tr>
            <tr valign="top">
              <td><italic>P</italic> (RF vs RF-base)</td>
              <td>.003</td>
              <td>.01</td>
              <td>.01</td>
              <td>.01</td>
              <td>.10</td>
              <td>.046</td>
              <td>&#60;.001</td>
              <td>&#60;.001</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table4fn1">
            <p><sup>a</sup>P5: precision at rank 5.</p>
          </fn>
          <fn id="table4fn2">
            <p><sup>b</sup>R5: recall at rank 5.</p>
          </fn>
          <fn id="table4fn3">
            <p><sup>c</sup>F5: <italic>F</italic>-score at rank 5.</p>
          </fn>
          <fn id="table4fn4">
            <p><sup>d</sup>P10: precision at rank 10.</p>
          </fn>
          <fn id="table4fn5">
            <p><sup>e</sup>R10: recall at rank 10.</p>
          </fn>
          <fn id="table4fn6">
            <p><sup>f</sup>F10: <italic>F</italic>-score at rank 10.</p>
          </fn>
          <fn id="table4fn7">
            <p><sup>g</sup>AUC-ROC<sub>ranking</sub>: area under the receiver operating characteristic curve computed on the candidate terms extracted by a system.</p>
          </fn>
          <fn id="table4fn8">
            <p><sup>h</sup>AUC-ROC<sub>KE</sub>: area under the receiver operating characteristic curve (KE: keyphrase extraction) computed by using all the gold-standard important terms as positive examples.</p>
          </fn>
          <fn id="table4fn9">
            <p><sup>i</sup>FOCUS-base: Finding impOrtant medical Concepts most Useful to patientS; uses only the baseline features.</p>
          </fn>
          <fn id="table4fn10">
            <p><sup>j</sup>FOCUS: Finding impOrtant medical Concepts most Useful to patientS; uses the baseline features plus the additional features.</p>
          </fn>
          <fn id="table4fn11">
            <p><sup>k</sup>RF-base: random forest; uses only the baseline features.</p>
          </fn>
          <fn id="table4fn12">
            <p><sup>l</sup>RF: random forest; uses the baseline features plus the additional features.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap></sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We have shown that physicians were able to identify important terms from EHR notes with moderate agreement (Cohen’s kappa .51). This level of annotation agreement is acceptable for keyphrase annotation tasks [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref83">83</xref>]. We used the physicians’ agreement to obtain high-quality data to develop and evaluate systems that automated this task.</p>
        <p>Automated identification of EHR terms important to patients is challenging for several reasons. First, although frequency-based statistics such as term frequency and TF-IDF are widely used to estimate the importance of a term for a document, they are less effective for EHRs. For example, in our data, 56% of important medical terms occur only once in any individual EHR note. Second, we cannot infer the importance of a medical term solely based on its unfamiliarity level, as introduced in the Background and Significance subsection. Third, physicians’ annotations cannot be represented by simple patterns. One reason is that most patients in our data have comorbidity and the important terms identified by physicians are usually related to only some of their diseases. In addition, the important terms are spread over a wide range of topics—details in the Statistics of FOCUS Corpus subsection—and thus cannot be inferred by manual categorical rules. Fourth, EHR notes contain abundant medical terms, among which only a small portion (4% in our case) were annotated as positive or important. Such imbalanced data pose extra challenges for supervised learning.</p>
        <p>Despite the above challenges, our FOCUS system achieves a decent 0.866 AUC-ROC, suggesting that the learning-to-rank model with rich features is effective.</p>
      </sec>
      <sec>
        <title>FOCUS Versus Adapted KEA++ and Random Forest</title>
        <p>Our experiments show that FOCUS outperformed both adapted KEA++ and RF.</p>
        <p>Using a more sophisticated MetaMap system, FOCUS is more effective than adapted KEA++ in candidate term extraction, as reported in the Candidate Term Extraction subsection. MetaMap is a state-of-the-art lexical tool that is well-configured—using morphological analysis and nonexact string match—to detect medical concepts and their corresponding medical terms from text, while adapted KEA++ uses a simpler approach (ie, dictionary look-up of stemmed <italic>n</italic>-grams from text).</p>
        <p>We further compared FOCUS and adapted KEA++ on 28 FOCUS notes for which the 2 systems have the same recall on candidate extraction. FOCUS outperforms adapted KEA++ on this subset in all the evaluation measures, in particular, with significant improvements on AUC-ROC<sub>ranking</sub>(0.936 vs 0.903, <italic>P</italic>=.03) and AUC-ROC<sub>KE</sub>(0.875 vs 0.844, <italic>P</italic>=.03)<sub>.</sub> This indicates that the rich features and the rankSVM algorithm contribute to FOCUS’s performance gains.</p>
        <p>Despite using the same MetaMap extractor and features, FOCUS still shows an advantage, outperforming RF in all the evaluation measures. The performance difference demonstrated that the ranking-based approach outperformed the state-of-the-art classification-based approach (RF) for this task. We attribute FOCUS’s advantage over RF to the rankSVM algorithm used by FOCUS. Specifically, rankSVM sets its parameters by minimizing the number of swapped pairs during its model training, which is equivalent to maximizing the rank quality as measured by Kendall’s tau coefficient. In contrast, the RF algorithm is based on decision trees. The rules guiding the construction of decision trees (eg, information gain) are not directly optimizing rank quality.</p>
        <p>We further analyzed the top-10 terms identified by the 3 systems. FOCUS, RF, and adapted KEA++ respectively ranked 433, 417, and 379 unique terms in their top-10 lists—since we have 90 notes, the maximum number of unique terms is 900. This result indicates that all 3 systems output diversified top-ranked terms, which are not constrained by a small set of terms, with FOCUS’s output being the most diversified. We then identified terms frequently ranked as high (in the top 10) by each system using 2 criteria: (1) the term was identified as a candidate term for more than 10% (9/90) of the notes; and (2) the term was ranked in the top 10 over 60% of the time. The analysis results (see Table A4-1 in <xref ref-type="app" rid="app4">Multimedia Appendix 4</xref>) show that FOCUS and RF, RF and adapted KEA++, and FOCUS and adapted KEA++ share 6, 4, and 3 terms in their frequently ranked-as-high terms, respectively. Only 2 terms— <italic>hypothyroidism</italic> and <italic>chemotherapy</italic> —are frequently ranked as high by all 3 systems.</p>
      </sec>
      <sec>
        <title>Effects of Additional Features</title>
        <p>Our additional features, when applied jointly, improved both FOCUS and RF (see <xref ref-type="table" rid="table4">Table 4</xref>). As FOCUS and RF adopt different learning schemes—ranking versus classification—these results suggest that the beneficial effect of our additional features is generalizable to different learning methods.</p>
        <p>Among the additional features, word embedding improves the AUC-ROC scores most—these scores measure the quality of the global ranking (see row 2 in Table A3-1 in <xref ref-type="app" rid="app3">Multimedia Appendix 3</xref>). This feature has been successfully applied to other biomedical and clinical NLP tasks. To the best of our knowledge, our work is the first to apply word embedding to ranking important terms in EHRs and show its usefulness.</p>
        <p>The UMLS semantic type is the best in boosting performance at top ranks (rank=5 and rank=10, row 3 in Table A3-1 in <xref ref-type="app" rid="app3">Multimedia Appendix 3</xref>), suggesting its importance. One reason why it is useful is that medical terms with certain semantic types such as <italic>medical device</italic> and <italic>anatomical structure</italic> were almost never annotated by physicians as being important to patients. This feature, therefore, can help rank those terms lower to improve quality of top ranks.</p>
        <p>Although the 3 topic features only improve the baseline features slightly, further analysis shows that they, when combined with other features, improve the performance. In particular, the FOCUS system using complete features significantly outperformed the one not using the topic features on AUC-ROC (<italic>P</italic>=.03 for both AUC-ROC<sub>ranking</sub> and AUC-ROC<sub>KE</sub>).</p>
        <p>The FOCUS systems that respectively use only all additional features and only word embedding achieved adequate results, especially on AUC-ROC scores (see Table A3-2 in <xref ref-type="app" rid="app3">Multimedia Appendix 3</xref>). However, they still performed worse than the system using all features, especially at top ranks.</p>
      </sec>
      <sec>
        <title>Error Analysis and Future Work</title>
        <p>We manually examined 17 notes, for which FOCUS has either zero recall at rank 5 or low AUC-ROC<sub>KE</sub>(&#60;0.800). We identified 3 error patterns.</p>
        <p>First, we used relaxed string match for evaluation but did not allow <italic>part-of</italic> match, for the reason discussed in the Evaluation Metrics subsection. However, in some cases, this approach underestimates the performance. For example, FOCUS counted it as a mistake if MetaMap recognized <italic>stem cell transplant</italic> but not <italic>autologous stem cell transplant</italic>, the gold-standard term.</p>
        <p>Second, FOCUS depends on MetaMap, which makes mistakes. It failed to identify certain abbreviations as medical terms (eg, <italic>A1c</italic> [a lab test for blood glucose], <italic>BMD</italic> [a lab test for bone mineral density], <italic>CPPD</italic> [calcium pyrophosphate deposition disease], and <italic>TSH</italic> [a lab test for thyroid stimulating hormone]). In future work, we may collect a list of common clinical abbreviations by mining a large EHR corpus and use this list to enhance medical term identification.</p>
        <p>Third, the error is due to data sparsity. Although word embedding helps overcome data sparsity, FOCUS failed to rank as high some infrequent medical terms, such as <italic>femoral popliteal bypass</italic> and <italic>pseudogout</italic>. In future work, we will explore advanced approaches to deal with out-of-vocabulary words.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Due to the common bottleneck of creating an expert-annotated resource, we only annotated 90 EHR notes for the reference standard and training data. Although this is not a large dataset, our system FOCUS shows an impressive performance of 0.940 AUC-ROC for 10-fold cross-validation on this data, suggesting that the data size may be sufficient.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>We have presented a new clinical NLP task—identifying medical terms important to patients from EHRs. We developed FOCUS, a learning-based NLP system that is based on SVM learning-to-rank algorithm and rich learning features. The evaluation done on 90 physician-annotated EHR notes showed that FOCUS significantly outperformed other state-of-the-art NLP systems and that the additional features we developed were beneficial in boosting its performance.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <app id="app1">
        <title>Multimedia Appendix 1</title>
        <p>Guidelines for annotating medical terms important to patients in electronic health record notes.</p>
        <media xlink:href="medinform_v4i4e40_app1.pdf" xlink:title="PDF File (Adobe PDF File), 454KB"/>
      </app>
      <app id="app2">
        <title>Multimedia Appendix 2</title>
        <p>Formulas for calculating frequency-based features.</p>
        <media xlink:href="medinform_v4i4e40_app2.pdf" xlink:title="PDF File (Adobe PDF File), 543KB"/>
      </app>
      <app id="app3">
        <title>Multimedia Appendix 3</title>
        <p>Effects of additional features on FOCUS’s ranking performance. FOCUS: Finding impOrtant medical Concepts most Useful to patientS.</p>
        <media xlink:href="medinform_v4i4e40_app3.pdf" xlink:title="PDF File (Adobe PDF File), 685KB"/>
      </app>
      <app id="app4">
        <title>Multimedia Appendix 4</title>
        <p>Medical terms frequently ranked as high by different natural language processing systems.</p>
        <media xlink:href="medinform_v4i4e40_app4.pdf" xlink:title="PDF File (Adobe PDF File), 450KB"/>
      </app>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC-ROC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BMD</term>
          <def>
            <p>bone mineral density</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CBC</term>
          <def>
            <p>complete blood count</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CHV</term>
          <def>
            <p>consumer health vocabulary</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CPPD</term>
          <def>
            <p>calcium pyrophosphate deposition disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">cTAKES</term>
          <def>
            <p>clinical Text Analysis and Knowledge Extraction System</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">F5</term>
          <def>
            <p><italic>F</italic>-score at rank 5</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">F10</term>
          <def>
            <p><italic>F</italic>-score at rank 10</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">FOCUS</term>
          <def>
            <p>Finding impOrtant medical Concepts most Useful to patientS</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">ICD-9</term>
          <def>
            <p>ninth revision of the International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">KE</term>
          <def>
            <p>keyphrase extraction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">KEA</term>
          <def>
            <p>keyphrase extraction algorithm</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">KIP</term>
          <def>
            <p>keyphrase identification program</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">MALLET</term>
          <def>
            <p>MAchine Learning for LanguagE Toolkit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">maxWL</term>
          <def>
            <p>length of the longest word (by character) in a candidate term</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">MeSH</term>
          <def>
            <p>Medical Subject Headings</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">P5</term>
          <def>
            <p>precision at rank 5</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">P10</term>
          <def>
            <p>precision at rank 10</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb21">POS</term>
          <def>
            <p>part of speech</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb22">R5</term>
          <def>
            <p>recall at rank 5</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb23">R10</term>
          <def>
            <p>recall at rank 10</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb24">rankSVM</term>
          <def>
            <p>ranking support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb25">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb26">SNOMED</term>
          <def>
            <p>Systematized Nomenclature of Medicine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb27">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb28">TF-IDF</term>
          <def>
            <p>term frequency-inverse document frequency</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb29">TL</term>
          <def>
            <p>term length</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb30">TSH</term>
          <def>
            <p>thyroid stimulating hormone</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb31">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the Investigator Initiated Research (1I01HX001457-01) from the Health Services Research and Development Program of the United States Department of Veterans Affairs. The content is solely the responsibility of the authors and does not represent the views of the United States Department of Veterans Affairs or the United States Government.</p>
      <p>We thank the UMassMed annotation team, including Elaine Freund, Victoria Wang, Andrew Hsu, Barinder Hansra, and Sonali Harchandani, for creating the FOCUS corpus, and we thank Weisong Liu for technical support in collecting EHR notes. We also thank the anonymous reviewers for their constructive comments and suggestions.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
      <fn fn-type="con">
        <p>HY and JC designed the study and led the effort to develop the annotation guideline. JC and JZ collected the data. JC designed and developed the FOCUS system, conducted the experiments, and drafted the manuscript. All authors have made significant contributions to data analysis and paper revision. HY is responsible for answering queries about the FOCUS corpus.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">

        <article-title>Vol Title XIII of Division A and Title IV of Division B of the American Recovery and Reinvestment Act of 2009</article-title>
        <source>Washington, DC: Office of the National Coordinator for Health Information; 2009 Feb 18. Health Information Technology for Economic and Clinical Health Act (HITECH Act)</source>  
        <comment>
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.healthit.gov/sites/default/files/hitech_act_excerpt_from_arra_with_index.pdf">https://www.healthit.gov/sites/default/files/hitech_act_excerpt_from_arra_with_index.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6m5sIHphk"/></comment> </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Steinbrook</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Health care and the American Recovery and Reinvestment Act</article-title>
        <source>N Engl J Med</source>  
        <year>2009</year>  
        <month>03</month>  
        <day>12</day>  
        <volume>360</volume>  
        <issue>11</issue>  
        <fpage>1057</fpage>  
        <lpage>1060</lpage>  
        <pub-id pub-id-type="doi">10.1056/NEJMp0900665</pub-id>
        <pub-id pub-id-type="medline">19224738</pub-id>
        <pub-id pub-id-type="pii">NEJMp0900665</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wright</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Feblowitz</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Samal</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>McCoy</surname>
            <given-names>AB</given-names>
          </name>
          <name name-style="western">
            <surname>Sittig</surname>
            <given-names>DF</given-names>
          </name>
        </person-group>
        <article-title>The Medicare Electronic Health Record Incentive Program: Provider performance on core and menu measures</article-title>
        <source>Health Serv Res</source>  
        <year>2014</year>  
        <month>02</month>  
        <volume>49</volume>  
        <issue>1 Pt 2</issue>  
        <fpage>325</fpage>  
        <lpage>346</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24359554"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1111/1475-6773.12134</pub-id>
        <pub-id pub-id-type="medline">24359554</pub-id>
        <pub-id pub-id-type="pmcid">PMC3925405</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Irizarry</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>DeVito</surname>
            <given-names>DA</given-names>
          </name>
          <name name-style="western">
            <surname>Curran</surname>
            <given-names>CR</given-names>
          </name>
        </person-group>
        <article-title>Patient portals and patient engagement: A state of the science review</article-title>
        <source>J Med Internet Res</source>  
        <year>2015</year>  
        <volume>17</volume>  
        <issue>6</issue>  
        <fpage>e148</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2015/6/e148/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.4255</pub-id>
        <pub-id pub-id-type="medline">26104044</pub-id>
        <pub-id pub-id-type="pii">v17i6e148</pub-id>
        <pub-id pub-id-type="pmcid">PMC4526960</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
<nlm-citation citation-type="journal"> <person-group person-group-type="author"> <name name-style="western"> <surname>Delbanco</surname> <given-names>Tom</given-names> </name> <name name-style="western"> <surname>Walker</surname> <given-names>Jan</given-names> </name> <name name-style="western"> <surname>Darer</surname> <given-names>Jonathan D</given-names> </name> <name name-style="western"> <surname>Elmore</surname> <given-names>Joann G</given-names> </name> <name name-style="western"> <surname>Feldman</surname> <given-names>Henry J</given-names> </name> <name name-style="western"> <surname>Leveille</surname> <given-names>Suzanne G</given-names> </name> <name name-style="western"> <surname>Ralston</surname> <given-names>James D</given-names> </name> <name name-style="western"> <surname>Ross</surname> <given-names>Stephen E</given-names> </name> <name name-style="western"> <surname>Vodicka</surname> <given-names>Elisabeth</given-names> </name> <name name-style="western"> <surname>Weber</surname> <given-names>Valerie D</given-names> </name> </person-group> <article-title>Open notes: doctors and patients signing on</article-title> <source>Ann Intern Med</source> <year>2010</year> <month>07</month> <day>20</day> <volume>153</volume> <issue>2</issue> <fpage>121</fpage> <lpage>5</lpage> <pub-id pub-id-type="doi">10.7326/0003-4819-153-2-201007200-00008</pub-id> <pub-id pub-id-type="medline">20643992</pub-id> <pub-id pub-id-type="pii">153/2/121</pub-id> </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
        <source>HealthIT.gov</source>  
        <access-date>2016-11-17</access-date>
        <comment>About the Blue Button movement. 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.healthit.gov/patients-families/about-blue-button-movement">https://www.healthit.gov/patients-families/about-blue-button-movement</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6m5tZNgI8"/></comment> </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Delbanco</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Walker</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Bell</surname>
            <given-names>SK</given-names>
          </name>
          <name name-style="western">
            <surname>Darer</surname>
            <given-names>JD</given-names>
          </name>
          <name name-style="western">
            <surname>Elmore</surname>
            <given-names>JG</given-names>
          </name>
          <name name-style="western">
            <surname>Farag</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Feldman</surname>
            <given-names>HJ</given-names>
          </name>
          <name name-style="western">
            <surname>Mejilla</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Ngo</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Ralston</surname>
            <given-names>JD</given-names>
          </name>
          <name name-style="western">
            <surname>Ross</surname>
            <given-names>SE</given-names>
          </name>
          <name name-style="western">
            <surname>Trivedi</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Vodicka</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Leveille</surname>
            <given-names>SG</given-names>
          </name>
        </person-group>
        <article-title>Inviting patients to read their doctors' notes: A quasi-experimental study and a look ahead</article-title>
        <source>Ann Intern Med</source>  
        <year>2012</year>  
        <month>10</month>  
        <day>2</day>  
        <volume>157</volume>  
        <issue>7</issue>  
        <fpage>461</fpage>  
        <lpage>470</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23027317"/>
        </comment>  
        <pub-id pub-id-type="doi">10.7326/0003-4819-157-7-201210020-00002</pub-id>
        <pub-id pub-id-type="medline">23027317</pub-id>
        <pub-id pub-id-type="pii">1363511</pub-id>
        <pub-id pub-id-type="pmcid">PMC3908866</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nazi</surname>
            <given-names>KM</given-names>
          </name>
          <name name-style="western">
            <surname>Hogan</surname>
            <given-names>TP</given-names>
          </name>
          <name name-style="western">
            <surname>McInnes</surname>
            <given-names>DK</given-names>
          </name>
          <name name-style="western">
            <surname>Woods</surname>
            <given-names>SS</given-names>
          </name>
          <name name-style="western">
            <surname>Graham</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Evaluating patient access to electronic health records: Results from a survey of veterans</article-title>
        <source>Med Care</source>  
        <year>2013</year>  
        <month>03</month>  
        <volume>51</volume>  
        <issue>3 Suppl 1</issue>  
        <fpage>S52</fpage>  
        <lpage>S56</lpage>  
        <pub-id pub-id-type="doi">10.1097/MLR.0b013e31827808db</pub-id>
        <pub-id pub-id-type="medline">23407012</pub-id>
        <pub-id pub-id-type="pii">00005650-201303001-00011</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Woods</surname>
            <given-names>SS</given-names>
          </name>
          <name name-style="western">
            <surname>Schwartz</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Tuepker</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Press</surname>
            <given-names>NA</given-names>
          </name>
          <name name-style="western">
            <surname>Nazi</surname>
            <given-names>KM</given-names>
          </name>
          <name name-style="western">
            <surname>Turvey</surname>
            <given-names>CL</given-names>
          </name>
          <name name-style="western">
            <surname>Nichol</surname>
            <given-names>WP</given-names>
          </name>
        </person-group>
        <article-title>Patient experiences with full electronic access to health records and clinical notes through the My HealtheVet Personal Health Record Pilot: Qualitative study</article-title>
        <source>J Med Internet Res</source>  
        <year>2013</year>  
        <volume>15</volume>  
        <issue>3</issue>  
        <fpage>e65</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2013/3/e65"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.2356</pub-id>
        <pub-id pub-id-type="medline">23535584</pub-id>
        <pub-id pub-id-type="pii">v15i3e65</pub-id>
        <pub-id pub-id-type="pmcid">PMC3636169</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng-Treitler</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Goryachev</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Keselman</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Slaughter</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>CA</given-names>
          </name>
        </person-group>
        <article-title>Text characteristics of clinical reports and their implications for the readability of personal health records</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2007</year>  
        <volume>129</volume>  
        <issue>Pt 2</issue>  
        <fpage>1117</fpage>  
        <lpage>1121</lpage>  
        <pub-id pub-id-type="medline">17911889</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kandula</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Curtis</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Zeng-Treitler</surname>
            <given-names>Q</given-names>
          </name>
        </person-group>
        <article-title>A semantic and syntactic text simplification tool for health content</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2010</year>  
        <volume>2010</volume>  
        <fpage>366</fpage>  
        <lpage>370</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21347002"/>
        </comment>  
        <pub-id pub-id-type="medline">21347002</pub-id>
        <pub-id pub-id-type="pmcid">PMC3041424</pub-id></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Polepalli</surname>
            <given-names>RB</given-names>
          </name>
          <name name-style="western">
            <surname>Houston</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Brandt</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Fang</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Improving patients' electronic health record comprehension with NoteAid</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2013</year>  
        <volume>192</volume>  
        <fpage>714</fpage>  
        <lpage>718</lpage>  
        <pub-id pub-id-type="medline">23920650</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sarzynski</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Hashmi</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Subramanian</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Fitzpatrick</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Polverento</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Simmons</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Brooks</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Given</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Opportunities to improve clinical summaries for patients at hospital discharge</article-title>
        <source>BMJ Qual Saf</source>  
        <year>2016</year>  
        <month>05</month>  
        <day>6</day>  
        <pub-id pub-id-type="doi">10.1136/bmjqs-2015-005201</pub-id>
        <pub-id pub-id-type="medline">27154878</pub-id>
        <pub-id pub-id-type="pii">bmjqs-2015-005201</pub-id></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Doak</surname>
            <given-names>CC</given-names>
          </name>
          <name name-style="western">
            <surname>Doak</surname>
            <given-names>LG</given-names>
          </name>
          <name name-style="western">
            <surname>Root</surname>
            <given-names>JH</given-names>
          </name>
        </person-group>
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Morton</surname>
            <given-names>PG</given-names>
          </name>
        </person-group>
        <source>Teaching Patients With Low Literacy Skills. 2nd edition</source>  
        <year>1996</year>  
        <publisher-loc>Philadelphia, PA</publisher-loc>
        <publisher-name>JB Lippincott Company</publisher-name></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Doak</surname>
            <given-names>CC</given-names>
          </name>
          <name name-style="western">
            <surname>Doak</surname>
            <given-names>LG</given-names>
          </name>
          <name name-style="western">
            <surname>Friedell</surname>
            <given-names>GH</given-names>
          </name>
          <name name-style="western">
            <surname>Meade</surname>
            <given-names>CD</given-names>
          </name>
        </person-group>
        <article-title>Improving comprehension for cancer patients with low literacy skills: Strategies for clinicians</article-title>
        <source>CA Cancer J Clin</source>  
        <year>1998</year>  
        <volume>48</volume>  
        <issue>3</issue>  
        <fpage>151</fpage>  
        <lpage>162</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://onlinelibrary.wiley.com/resolve/openurl?genre=article&#38;sid=nlm:pubmed&#38;issn=0007-9235&#38;date=1998&#38;volume=48&#38;issue=3&#38;spage=151"/>
        </comment>  
        <pub-id pub-id-type="medline">9594918</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Walsh</surname>
            <given-names>TM</given-names>
          </name>
          <name name-style="western">
            <surname>Volsko</surname>
            <given-names>TA</given-names>
          </name>
        </person-group>
        <article-title>Readability assessment of Internet-based consumer health information</article-title>
        <source>Respir Care</source>  
        <year>2008</year>  
        <month>10</month>  
        <volume>53</volume>  
        <issue>10</issue>  
        <fpage>1310</fpage>  
        <lpage>1315</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.rcjournal.com/contents/10.08/10.08.1310.pdf"/>
        </comment>  
        <pub-id pub-id-type="medline">18811992</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Eltorai</surname>
            <given-names>AE</given-names>
          </name>
          <name name-style="western">
            <surname>Han</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Truntzer</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Daniels</surname>
            <given-names>AH</given-names>
          </name>
        </person-group>
        <article-title>Readability of patient education materials on the American Orthopaedic Society for Sports Medicine website</article-title>
        <source>Phys Sportsmed</source>  
        <year>2014</year>  
        <month>11</month>  
        <volume>42</volume>  
        <issue>4</issue>  
        <fpage>125</fpage>  
        <lpage>130</lpage>  
        <pub-id pub-id-type="doi">10.3810/psm.2014.11.2099</pub-id>
        <pub-id pub-id-type="medline">25419896</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Morony</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Flynn</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>McCaffery</surname>
            <given-names>KJ</given-names>
          </name>
          <name name-style="western">
            <surname>Jansen</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Webster</surname>
            <given-names>AC</given-names>
          </name>
        </person-group>
        <article-title>Readability of written materials for CKD patients: A systematic review</article-title>
        <source>Am J Kidney Dis</source>  
        <year>2015</year>  
        <month>06</month>  
        <volume>65</volume>  
        <issue>6</issue>  
        <fpage>842</fpage>  
        <lpage>850</lpage>  
        <pub-id pub-id-type="doi">10.1053/j.ajkd.2014.11.025</pub-id>
        <pub-id pub-id-type="medline">25661679</pub-id>
        <pub-id pub-id-type="pii">S0272-6386(14)01535-2</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kutner</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Greenberg</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Jin</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Paulsen</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <source>The Health Literacy of America’s Adults: Results From the 2003 National Assessment of Adult Literacy</source>  
        <year>2006</year>  
        <month>09</month>  
        <access-date>2016-11-11</access-date>
        <publisher-loc>Washington, DC</publisher-loc>
        <publisher-name>US Department of Education, National Center for Education Statistics</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://nces.ed.gov/pubs2006/2006483.pdf">http://nces.ed.gov/pubs2006/2006483.pdf</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6lwUr7mOK"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Pyper</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Amery</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Watson</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Crook</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Patients' experiences when accessing their online electronic patient records in primary care</article-title>
        <source>Br J Gen Pract</source>  
        <year>2004</year>  
        <month>01</month>  
        <volume>54</volume>  
        <issue>498</issue>  
        <fpage>38</fpage>  
        <lpage>43</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://bjgp.org/cgi/pmidlookup?view=long&#38;pmid=14965405"/>
        </comment>  
        <pub-id pub-id-type="medline">14965405</pub-id>
        <pub-id pub-id-type="pmcid">PMC1314776</pub-id></nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Keselman</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Slaughter</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>CA</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Divita</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Browne</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Tsai</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Zeng-Treitler</surname>
            <given-names>Q</given-names>
          </name>
        </person-group>
        <article-title>Towards consumer-friendly PHRs: Patients' experience with reviewing their health records</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2007</year>  
        <fpage>399</fpage>  
        <lpage>403</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18693866"/>
        </comment>  
        <pub-id pub-id-type="medline">18693866</pub-id>
        <pub-id pub-id-type="pmcid">PMC2655877</pub-id></nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chapman</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Abraham</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Jenkins</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Fallowfield</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Lay understanding of terms used in cancer consultations</article-title>
        <source>Psychooncology</source>  
        <year>2003</year>  
        <month>09</month>  
        <volume>12</volume>  
        <issue>6</issue>  
        <fpage>557</fpage>  
        <lpage>566</lpage>  
        <pub-id pub-id-type="doi">10.1002/pon.673</pub-id>
        <pub-id pub-id-type="medline">12923796</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lerner</surname>
            <given-names>EB</given-names>
          </name>
          <name name-style="western">
            <surname>Jehle</surname>
            <given-names>DV</given-names>
          </name>
          <name name-style="western">
            <surname>Janicke</surname>
            <given-names>DM</given-names>
          </name>
          <name name-style="western">
            <surname>Moscati</surname>
            <given-names>RM</given-names>
          </name>
        </person-group>
        <article-title>Medical communication: Do our patients understand?</article-title>
        <source>Am J Emerg Med</source>  
        <year>2000</year>  
        <month>11</month>  
        <volume>18</volume>  
        <issue>7</issue>  
        <fpage>764</fpage>  
        <lpage>766</lpage>  
        <pub-id pub-id-type="doi">10.1053/ajem.2000.18040</pub-id>
        <pub-id pub-id-type="medline">11103725</pub-id>
        <pub-id pub-id-type="pii">S0735-6757(00)39827-8</pub-id></nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Jones</surname>
            <given-names>RB</given-names>
          </name>
          <name name-style="western">
            <surname>McGhee</surname>
            <given-names>SM</given-names>
          </name>
          <name name-style="western">
            <surname>McGhee</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Patient online access to medical records in general practice</article-title>
        <source>Health Bull (Edinb)</source>  
        <year>1992</year>  
        <month>03</month>  
        <volume>50</volume>  
        <issue>2</issue>  
        <fpage>143</fpage>  
        <lpage>150</lpage>  
        <pub-id pub-id-type="medline">1517087</pub-id></nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Baldry</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Cheal</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Fisher</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Gillett</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Huet</surname>
            <given-names>V</given-names>
          </name>
        </person-group>
        <article-title>Giving patients their own records in general practice: Experience of patients and staff</article-title>
        <source>Br Med J (Clin Res Ed)</source>  
        <year>1986</year>  
        <month>03</month>  
        <day>1</day>  
        <volume>292</volume>  
        <issue>6520</issue>  
        <fpage>596</fpage>  
        <lpage>598</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/3081187"/>
        </comment>  
        <pub-id pub-id-type="medline">3081187</pub-id>
        <pub-id pub-id-type="pmcid">PMC1339574</pub-id></nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sarkar</surname>
            <given-names>U</given-names>
          </name>
          <name name-style="western">
            <surname>Karter</surname>
            <given-names>AJ</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>JY</given-names>
          </name>
          <name name-style="western">
            <surname>Adler</surname>
            <given-names>NE</given-names>
          </name>
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Lopez</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Schillinger</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>The literacy divide: Health literacy and the use of an Internet-based patient portal in an integrated health system-Results from the diabetes study of northern California (DISTANCE)</article-title>
        <source>J Health Commun</source>  
        <year>2010</year>  
        <volume>15 Suppl 2</volume>  
        <fpage>183</fpage>  
        <lpage>196</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20845203"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1080/10810730.2010.499988</pub-id>
        <pub-id pub-id-type="medline">20845203</pub-id>
        <pub-id pub-id-type="pii">926943837</pub-id>
        <pub-id pub-id-type="pmcid">PMC3014858</pub-id></nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zarcadoolas</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Vaughon</surname>
            <given-names>WL</given-names>
          </name>
          <name name-style="western">
            <surname>Czaja</surname>
            <given-names>SJ</given-names>
          </name>
          <name name-style="western">
            <surname>Levy</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Rockoff</surname>
            <given-names>ML</given-names>
          </name>
        </person-group>
        <article-title>Consumers' perceptions of patient-accessible electronic medical records</article-title>
        <source>J Med Internet Res</source>  
        <year>2013</year>  
        <volume>15</volume>  
        <issue>8</issue>  
        <fpage>e168</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2013/8/e168/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.2507</pub-id>
        <pub-id pub-id-type="medline">23978618</pub-id>
        <pub-id pub-id-type="pii">v15i8e168</pub-id>
        <pub-id pub-id-type="pmcid">PMC3758049</pub-id></nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Tieu</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Sarkar</surname>
            <given-names>U</given-names>
          </name>
          <name name-style="western">
            <surname>Schillinger</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Ralston</surname>
            <given-names>JD</given-names>
          </name>
          <name name-style="western">
            <surname>Ratanawongsa</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Pasick</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Lyles</surname>
            <given-names>CR</given-names>
          </name>
        </person-group>
        <article-title>Barriers and facilitators to online portal use among patients and caregivers in a safety net health care system: A qualitative study</article-title>
        <source>J Med Internet Res</source>  
        <year>2015</year>  
        <volume>17</volume>  
        <issue>12</issue>  
        <fpage>e275</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2015/12/e275/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.4847</pub-id>
        <pub-id pub-id-type="medline">26681155</pub-id>
        <pub-id pub-id-type="pii">v17i12e275</pub-id>
        <pub-id pub-id-type="pmcid">PMC4704882</pub-id></nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Aronson</surname>
            <given-names>AR</given-names>
          </name>
          <name name-style="western">
            <surname>Lang</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>An overview of MetaMap: Historical perspective and recent advances</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2010</year>  
        <volume>17</volume>  
        <issue>3</issue>  
        <fpage>229</fpage>  
        <lpage>236</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/lookup/pmidlookup?view=long&#38;pmid=20442139"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/jamia.2009.002733</pub-id>
        <pub-id pub-id-type="medline">20442139</pub-id>
        <pub-id pub-id-type="pii">17/3/229</pub-id>
        <pub-id pub-id-type="pmcid">PMC2995713</pub-id></nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng-Treitler</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Goryachev</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Keselman</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Rosendale</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Making texts in electronic health records comprehensible to consumers: A prototype translator</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2007</year>  
        <fpage>846</fpage>  
        <lpage>850</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18693956"/>
        </comment>  
        <pub-id pub-id-type="medline">18693956</pub-id>
        <pub-id pub-id-type="pmcid">PMC2655860</pub-id></nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Abrahamsson</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Forni</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Skeppstedt</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kvist</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Medical text simplification using synonym replacement: Adapting assessment of word difficulty to a compounding language</article-title>
        <source>Proceedings of the 3rd Workshop on Predicting and Improving Text Readability for Target Reader Populations (PIT), 14th Conference of the European Chapter of the Association for Computational Linguistics</source>  
        <year>2014</year>  
        <conf-name>The 14th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2014)</conf-name>
        <conf-date>April 26-30, 2014</conf-date>
        <conf-loc>Gothenburg, Sweden</conf-loc>
        <publisher-loc>Stroudsburg, PA</publisher-loc>
        <publisher-name>Association for Computational Linguistics</publisher-name>
        <fpage>57</fpage>  
        <lpage>65</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.aclweb.org/anthology/W14-1207"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Methods for linking EHR notes to education materials</article-title>
        <source>AMIA Jt Summits Transl Sci Proc</source>  
        <year>2015</year>  
        <volume>2015</volume>  
        <fpage>209</fpage>  
        <lpage>215</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26306273"/>
        </comment>  
        <pub-id pub-id-type="medline">26306273</pub-id>
        <pub-id pub-id-type="pmcid">PMC4525231</pub-id></nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hasan</surname>
            <given-names>KS</given-names>
          </name>
          <name name-style="western">
            <surname>Ng</surname>
            <given-names>V</given-names>
          </name>
        </person-group>
        <article-title>Automatic keyphrase extraction: A survey of the state of the art</article-title>
        <source>Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (ACL 2014)</source>  
        <year>2014</year>  
        <conf-name>The 52nd Annual Meeting of the Association for Computational Linguistics (ACL 2014)</conf-name>
        <conf-date>June 23-25, 2014</conf-date>
        <conf-loc>Baltimore, MD</conf-loc>
        <publisher-loc>Stroudsburg, PA</publisher-loc>
        <publisher-name>Association for Computational Linguistics</publisher-name>
        <fpage>1262</fpage>  
        <lpage>1273</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://acl2014.org/acl2014/P14-1/pdf/P14-1119.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Witten</surname>
            <given-names>IH</given-names>
          </name>
          <name name-style="western">
            <surname>Paynter</surname>
            <given-names>GW</given-names>
          </name>
          <name name-style="western">
            <surname>Frank</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Gutwin</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Nevill-Manning</surname>
            <given-names>CG</given-names>
          </name>
        </person-group>
        <article-title>KEA: Practical automatic keyphrase extraction</article-title>
        <source>Proceedings of the Fourth ACM Conference on Digital Libraries</source>  
        <year>1999</year>  
        <conf-name>The Fourth ACM Conference on Digital Libraries</conf-name>
        <conf-date>August 11-14, 1999</conf-date>
        <conf-loc>Berkeley, CA</conf-loc>
        <publisher-name>ACM</publisher-name>
        <fpage>254</fpage>  
        <lpage>255</lpage>  
        <pub-id pub-id-type="doi">10.1145/313238.313437</pub-id></nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Frank</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Paynter</surname>
            <given-names>GW</given-names>
          </name>
          <name name-style="western">
            <surname>Witten</surname>
            <given-names>IH</given-names>
          </name>
          <name name-style="western">
            <surname>Gutwin</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Nevill-Manning</surname>
            <given-names>CG</given-names>
          </name>
        </person-group>
        <article-title>Domain-specific keyphrase extraction</article-title>
        <source>Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence (IJCAI-99)</source>  
        <year>1999</year>  
        <conf-name>The Sixteenth International Joint Conference on Artificial Intelligence (IJCAI-99)</conf-name>
        <conf-date>July 31-August 6, 1999</conf-date>
        <conf-loc>Stockholm, Sweden</conf-loc>
        <fpage>668</fpage>  
        <lpage>673</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.ijcai.org/Proceedings/99-2/Papers/002.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Turney</surname>
            <given-names>PD</given-names>
          </name>
        </person-group>
        <source>Learning to Extract Keyphrases From Text</source>  
        <year>1999</year>  
        <month>02</month>  
        <day>17</day>  
        <access-date>2016-11-10</access-date>
        <publisher-loc>Ottawa, ON</publisher-loc>
        <publisher-name>National Research Council Canada, Institute for Information Technology</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://extractor.com/ERB-1057.pdf">http://extractor.com/ERB-1057.pdf</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6lvC2cX9I"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hulth</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Improved automatic keyword extraction given more linguistic knowledge</article-title>
        <source>Proceedings of the 2003 Conference on Empirical Methods in Natural Language Processing</source>  
        <year>2003</year>  
        <conf-name>The 2003 Conference on Empirical Methods in Natural Language Processing</conf-name>
        <conf-date>July 11-12, 2003</conf-date>
        <conf-loc>Sapporo, Japan</conf-loc>
        <publisher-loc>Stroudsburg, PA</publisher-loc>
        <publisher-name>Association for Computational Linguistics</publisher-name>
        <fpage>216</fpage>  
        <lpage>223</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.aclweb.org/anthology/W03-1028"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>HaCohen-Kerner</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Gross</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Masa</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Automatic extraction and learning of keyphrases from scientific articles</article-title>
        <source>Proceedings of the 6th International Conference on Computational Linguistics and Intelligent Text Processing (CICLing'05)</source>  
        <year>2005</year>  
        <conf-name>The 6th International Conference on Computational Linguistics and Intelligent Text Processing (CICLing'05)</conf-name>
        <conf-date>February 13-19, 2005</conf-date>
        <conf-loc>Mexico City, Mexico</conf-loc>
        <publisher-loc>Berlin, Germany</publisher-loc>
        <publisher-name>Springer-Verlag</publisher-name>
        <fpage>657</fpage>  
        <lpage>669</lpage>  
        <pub-id pub-id-type="doi">10.1007/978-3-540-30586-6_74</pub-id></nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yih</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Goodman</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Carvalho</surname>
            <given-names>VR</given-names>
          </name>
        </person-group>
        <article-title>Finding advertising keywords on Web pages</article-title>
        <source>Proceedings of the 15th International Conference on World Wide Web (WWW '06)</source>  
        <year>2006</year>  
        <conf-name>The 15th International Conference on World Wide Web (WWW '06)</conf-name>
        <conf-date>May 23-26, 2006</conf-date>
        <conf-loc>Edinburgh, Scotland</conf-loc>
        <publisher-loc>New York, NY</publisher-loc>
        <publisher-name>ACM</publisher-name>
        <fpage>213</fpage>  
        <lpage>222</lpage>  
        <pub-id pub-id-type="doi">10.1145/1135777.1135813</pub-id></nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">

<article-title>Medelyan O, Frank E, Witten IH. Human-competitive tagging using automatic keyphrase extraction. In: Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing (EMNLP 2009)</article-title>
        <source>Stroudsburg, PA: Association for Computational Linguistics; 2009 Presented at: 2009 Conference on Empirical Methods in Natural Language Processing (EMNLP 2009); August 6-7, 2009; Singapore p. 1318-1327</source> 


        <comment> 
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.cs.waikato.ac.nz/ml/publications/2009/maui_emnlp2009_1dataset.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lopez</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Romary</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>HUMB: Automatic key term extraction from scientific articles in GROBID</article-title>
        <source>Proceedings of the 5th International Workshop on Semantic Evaluation (ACL 2010)</source>  
        <year>2010</year>  
        <conf-name>The 5th International Workshop on Semantic Evaluation (ACL 2010)</conf-name>
        <conf-date>July 15-16, 2010</conf-date>
        <conf-loc>Uppsala, Sweden</conf-loc>
        <publisher-loc>Stroudsburg, PA</publisher-loc>
        <publisher-name>Association for Computational Linguistics</publisher-name>
        <fpage>248</fpage>  
        <lpage>251</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.aclweb.org/anthology/S10-1055"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">

        <article-title>Jiang X, Hu Y, Li H. A ranking approach to keyphrase extraction. In: Proceedings of the 32nd International ACM SIGIR Conference on Research and Development in Information Retrieval</article-title>
        <source>New York, NY: ACM; 2009 Presented at: The 32nd International ACM SIGIR Conference on Research and Development in Information Retrieval; July 19-23, 2009; Boston, MA p. 756-757 (see details in the Microsoft Research Technical Report MSR-TR-2009-96)</source>  



        <comment> 
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.159.4470&#38;rep=rep1&#38;type=pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Krapivin</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Autayeu</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Marchese</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Blanzieri</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Segata</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Improving machine learning approaches for keyphrases extraction from scientific documents with natural language knowledge</article-title>
        <source>Proceedings of the Joint JCDL/ICADL International Digital Libraries Conference (JCDL 2010)</source>  
        <year>2010</year>  
        <conf-name>The Joint JCDL/ICADL International Digital Libraries Conference (JCDL 2010)</conf-name>
        <conf-date>June 21-25, 2010</conf-date>
        <conf-loc>Gold Coast, Australia</conf-loc>
        <publisher-loc>Berlin, Germany</publisher-loc>
        <publisher-name>Springer-Verlag</publisher-name>
        <fpage>102</fpage>  
        <lpage>111</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://pdfs.semanticscholar.org/38f8/1d0a1eede4d7b7df169a92df22906c92a950.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>YF</given-names>
          </name>
        </person-group>
        <article-title>Identifying important concepts from medical documents</article-title>
        <source>J Biomed Inform</source>  
        <year>2006</year>  
        <month>12</month>  
        <volume>39</volume>  
        <issue>6</issue>  
        <fpage>668</fpage>  
        <lpage>679</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S1532-0464(06)00021-9"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2006.02.001</pub-id>
        <pub-id pub-id-type="medline">16545986</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(06)00021-9</pub-id></nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Medelyan</surname>
            <given-names>O</given-names>
          </name>
          <name name-style="western">
            <surname>Witten</surname>
            <given-names>IH</given-names>
          </name>
        </person-group>
        <article-title>Domain-independent automatic keyphrase indexing with small training sets</article-title>
        <source>J Am Soc Inf Sci Technol</source>  
        <year>2008</year>  
        <month>05</month>  
        <volume>59</volume>  
        <issue>7</issue>  
        <fpage>1026</fpage>  
        <lpage>1040</lpage>  
        <pub-id pub-id-type="doi">10.1002/asi.20790</pub-id></nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sarkar</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>Automatic keyphrase extraction from medical documents</article-title>
        <source>Proceedings of the 3rd International Conference on Pattern Recognition and Machine Intelligence (PReMI '09)</source>  
        <year>2009</year>  
        <conf-name>The 3rd International Conference on Pattern Recognition and Machine Intelligence (PReMI '09)</conf-name>
        <conf-date>December 16-20, 2009</conf-date>
        <conf-loc>New Delhi, India</conf-loc>
        <fpage>273</fpage>  
        <lpage>278</lpage>  
        <pub-id pub-id-type="doi">10.1007/978-3-642-11164-8_44</pub-id></nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sarkar</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>A hybrid approach to extract keyphrases from medical documents</article-title>
        <source>Int J Comput Appl</source>  
        <year>2013</year>  
        <month>02</month>  
        <day>15</day>  
        <volume>63</volume>  
        <issue>18</issue>  
        <fpage>14</fpage>  
        <lpage>19</lpage>  
        <pub-id pub-id-type="doi">10.5120/10565-5528</pub-id></nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Herbrich</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Graepel</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Obermayer</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>Large margin rank boundaries for ordinal regression</article-title>
        <source>Proceedings of Advances in Neural Information Processing Systems 1999 (NIPS 1999)</source>  
        <year>1999</year>  
        <conf-name>Advances in Neural Information Processing Systems 1999 (NIPS 1999)</conf-name>
        <conf-date>November 29-December 4, 1999</conf-date>
        <conf-loc>Denver, CO</conf-loc>
        <fpage>115</fpage>  
        <lpage>132</lpage> </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Joachims</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>Training linear SVMs in linear time</article-title>
        <source>Proceedings of the 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD'06)</source>  
        <year>2006</year>  
        <conf-name>The 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD'06)</conf-name>
        <conf-date>August 20-23, 2006</conf-date>
        <conf-loc>Philadelphia, PA</conf-loc>
        <publisher-loc>New York, NY</publisher-loc>
        <publisher-name>ACM</publisher-name>
        <fpage>217</fpage>  
        <lpage>226</lpage>  
        <pub-id pub-id-type="doi">10.1145/1150402.1150429</pub-id></nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Turney</surname>
            <given-names>PD</given-names>
          </name>
        </person-group>
        <article-title>Learning algorithm for keyphrase extraction</article-title>
        <source>Inf Retr</source>  
        <year>2000</year>  
        <volume>2</volume>  
        <issue>4</issue>  
        <fpage>303</fpage>  
        <lpage>336</lpage>  
        <pub-id pub-id-type="doi">10.1023/A:1009976227802</pub-id></nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">

        <access-date>2016-11-17</access-date>
        <comment>Sarkar K, Nasipuri M, Ghose S. A new approach to keyphrase extraction using neural networks. Int J Comput Sci Issues 2010;7(2.3):16-25.
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.ijcsi.org/papers/7-2-3-16-25.pdf">http://www.ijcsi.org/papers/7-2-3-16-25.pdf</ext-link>
</comment> </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Savova</surname>
            <given-names>GK</given-names>
          </name>
          <name name-style="western">
            <surname>Masanz</surname>
            <given-names>JJ</given-names>
          </name>
          <name name-style="western">
            <surname>Ogren</surname>
            <given-names>PV</given-names>
          </name>
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Sohn</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Kipper-Schuler</surname>
            <given-names>KC</given-names>
          </name>
          <name name-style="western">
            <surname>Chute</surname>
            <given-names>CG</given-names>
          </name>
        </person-group>
        <article-title>Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): Architecture, component evaluation and applications</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2010</year>  
        <volume>17</volume>  
        <issue>5</issue>  
        <fpage>507</fpage>  
        <lpage>513</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/lookup/pmidlookup?view=long&#38;pmid=20819853"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/jamia.2009.001560</pub-id>
        <pub-id pub-id-type="medline">20819853</pub-id>
        <pub-id pub-id-type="pii">17/5/507</pub-id>
        <pub-id pub-id-type="pmcid">PMC2995668</pub-id></nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mikolov</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Sutskever</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Corrado</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Dean</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Distributed representations of words and phrases and their compositionality</article-title>
        <source>Proceedings of Advances in Neural Information Processing Systems 2013 (NIPS 2013)</source>  
        <year>2013</year>  
        <conf-name>Advances in Neural Information Processing Systems 2013 (NIPS 2013)</conf-name>
        <conf-date>December 5-10, 2013</conf-date>
        <conf-loc>Lake Tahoe, NV</conf-loc>
        <fpage>3111</fpage>  
        <lpage>3119</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Tang</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Cao</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Evaluating word representation features in biomedical named entity recognition tasks</article-title>
        <source>Biomed Res Int</source>  
        <year>2014</year>  
        <volume>2014</volume>  
        <fpage>240403</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.doi.org/10.1155/2014/240403"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1155/2014/240403</pub-id>
        <pub-id pub-id-type="medline">24729964</pub-id>
        <pub-id pub-id-type="pmcid">PMC3963372</pub-id></nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Tang</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>X</given-names>
          </name>
        </person-group>
        <article-title>Effects of semantic features on machine learning-based drug name recognition systems: Word embeddings vs manually constructed dictionaries</article-title>
        <source>Inf</source>  
        <year>2015</year>  
        <month>12</month>  
        <day>11</day>  
        <volume>6</volume>  
        <issue>4</issue>  
        <fpage>848</fpage>  
        <lpage>865</lpage>  
        <pub-id pub-id-type="doi">10.3390/info6040848</pub-id></nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Huang</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>A general protein-protein interaction extraction architecture based on word representation and feature selection</article-title>
        <source>Int J Data Min Bioinform</source>  
        <year>2016</year>  
        <volume>14</volume>  
        <issue>3</issue>  
        <fpage>276</fpage>  
        <lpage>291</lpage>  
        <pub-id pub-id-type="doi">10.1504/IJDMB.2016.074878</pub-id></nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Song</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Liakata</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Vlachos</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Seneff</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>X</given-names>
          </name>
        </person-group>
        <article-title>Using word embedding for bio-event extraction</article-title>
        <source>Proceedings of the 2015 Workshop on Biomedical Natural Language Processing (BioNLP 2015)</source>  
        <year>2015</year>  
        <conf-name>The 2015 Workshop on Biomedical Natural Language Processing (BioNLP 2015)</conf-name>
        <conf-date>July 30, 2015</conf-date>
        <conf-loc>Beijing, China</conf-loc>
        <publisher-loc>Stroudsburg, PA</publisher-loc>
        <publisher-name>Association for Computational Linguistics</publisher-name>
        <fpage>121</fpage>  
        <lpage>126</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.aclweb.org/anthology/W15-3814"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nie</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Rong</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Ouyang</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Xiong</surname>
            <given-names>Z</given-names>
          </name>
        </person-group>
        <article-title>Embedding assisted prediction architecture for event trigger identification</article-title>
        <source>J Bioinform Comput Biol</source>  
        <year>2015</year>  
        <month>06</month>  
        <volume>13</volume>  
        <issue>3</issue>  
        <fpage>1541001</fpage>  
        <pub-id pub-id-type="doi">10.1142/S0219720015410012</pub-id>
        <pub-id pub-id-type="medline">25669328</pub-id></nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Henriksson</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Kvist</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Dalianis</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Duneld</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Identifying adverse drug event information in clinical notes with distributional semantic representations of context</article-title>
        <source>J Biomed Inform</source>  
        <year>2015</year>  
        <month>10</month>  
        <volume>57</volume>  
        <fpage>333</fpage>  
        <lpage>349</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00180-X"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2015.08.013</pub-id>
        <pub-id pub-id-type="medline">26291578</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(15)00180-X</pub-id></nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Jagannatha</surname>
            <given-names>AN</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Bidirectional RNN for medical event detection in electronic health records</article-title>
        <source>Proceedings of the 15th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>  
        <year>2016</year>  
        <conf-name>The 15th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name>
        <conf-date>June 12-17, 2016</conf-date>
        <conf-loc>San Diego, CA</conf-loc>
        <publisher-loc>Stroudsburg, PA</publisher-loc>
        <publisher-name>Association for Computational Linguistics</publisher-name>
        <fpage>473</fpage>  
        <lpage>482</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/N/N16/N16-1056.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Jagannatha</surname>
            <given-names>AN</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Mining and ranking biomedical synonym candidates from Wikipedia</article-title>
        <source>Proceedings of the Sixth International Workshop on Health Text Mining and Information Analysis (Louhi)</source>  
        <year>2015</year>  
        <conf-name>The Sixth International Workshop on Health Text Mining and Information Analysis (Louhi)</conf-name>
        <conf-date>September 17, 2015</conf-date>
        <conf-loc>Lisbon, Portugal</conf-loc>
        <fpage>142</fpage>  
        <lpage>151</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://aclweb.org/anthology/W/W15/W15-2619.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Clinical abbreviation disambiguation using neural word embeddings</article-title>
        <source>Proceedings of the 2015 Workshop on Biomedical Natural Language Processing (BioNLP 2015)</source>  
        <year>2015</year>  
        <conf-name>The 2015 Workshop on Biomedical Natural Language Processing (BioNLP 2015)</conf-name>
        <conf-date>July 30, 2015</conf-date>
        <conf-loc>Beijing, China</conf-loc>
        <fpage>171</fpage>  
        <lpage>176</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.aclweb.org/anthology/W15-3822"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Ge</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Mathews</surname>
            <given-names>KS</given-names>
          </name>
          <name name-style="western">
            <surname>Ji</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>McGuinness</surname>
            <given-names>DL</given-names>
          </name>
        </person-group>
        <article-title>Exploiting task-oriented resources to learn word embeddings for clinical abbreviation expansion</article-title>
        <source>Proceedings of the 2015 Workshop on Biomedical Natural Language Processing (BioNLP 2015)</source>  
        <year>2015</year>  
        <conf-name>The 2015 Workshop on Biomedical Natural Language Processing (BioNLP 2015)</conf-name>
        <conf-date>July 30, 2015</conf-date>
        <conf-loc>Beijing, China</conf-loc>
        <fpage>92</fpage>  
        <lpage>97</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/W15-3810"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="web">

        <article-title>Mikolov T, Chen K, Corrado G, Dean J</article-title>
        <source>Efficient Estimation of Word Representations in Vector Space. ArXiv13013781 Cs; 2013 Jan 16</source>  



        <comment> 
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.researchgate.net/profile/Gs_Corrado/publication/234131319_Efficient_Estimation_of_Word_Representations_in_Vector_Space/links/5446726b0cf2f14fb80f3c7b.pdf?origin=publication_detail">https://www.researchgate.net/profile/Gs_Corrado/publication/234131319_Efficient_Estimation_of_Word_Representations_in_Vector_Space/links/5446726b0cf2f14fb80f3c7b.pdf?origin=publication_detail</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6m6NhZqFz"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Pyysalo</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Ginter</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Moen</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Salakoski</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Ananiadou</surname>
            <given-names>S</given-names>
          </name>

        </person-group>
        <article-title>Distributional semantics resources for biomedical text processing</article-title>
        <source>Proceedings of the 5th International Symposium on Languages in Biology and Medicine (LBM 2013)</source>  
        <year>2013</year>  
        <conf-name>The 5th International Symposium on Languages in Biology and Medicine (LBM 2013)</conf-name>
        <conf-date>December 12-13, 2013</conf-date>
        <conf-loc>Tokyo, Japan</conf-loc>
        <fpage>39</fpage>  
        <lpage>43</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://bio.nlplab.org/pdf/pyysalo13literature.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>QT</given-names>
          </name>
          <name name-style="western">
            <surname>Tse</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>Exploring and developing consumer health vocabularies</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2006</year>  
        <volume>13</volume>  
        <issue>1</issue>  
        <fpage>24</fpage>  
        <lpage>29</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/lookup/pmidlookup?view=long&#38;pmid=16221948"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M1761</pub-id>
        <pub-id pub-id-type="medline">16221948</pub-id>
        <pub-id pub-id-type="pii">M1761</pub-id>
        <pub-id pub-id-type="pmcid">PMC1380193</pub-id></nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>McCray</surname>
            <given-names>AT</given-names>
          </name>
          <name name-style="western">
            <surname>Loane</surname>
            <given-names>RF</given-names>
          </name>
          <name name-style="western">
            <surname>Browne</surname>
            <given-names>AC</given-names>
          </name>
          <name name-style="western">
            <surname>Bangalore</surname>
            <given-names>AK</given-names>
          </name>
        </person-group>
        <article-title>Terminology issues in user access to Web-based medical information</article-title>
        <source>Proc AMIA Symp</source>  
        <year>1999</year>  
        <fpage>107</fpage>  
        <lpage>111</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/10566330"/>
        </comment>  
        <pub-id pub-id-type="medline">10566330</pub-id>
        <pub-id pub-id-type="pii">D005626</pub-id>
        <pub-id pub-id-type="pmcid">PMC2232498</pub-id></nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Kogan</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Ash</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Greenes</surname>
            <given-names>RA</given-names>
          </name>
        </person-group>
        <article-title>Patient and clinician vocabulary: How different are they?</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2001</year>  
        <volume>84</volume>  
        <issue>Pt 1</issue>  
        <fpage>399</fpage>  
        <lpage>403</lpage>  
        <pub-id pub-id-type="medline">11604772</pub-id></nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Patrick</surname>
            <given-names>TB</given-names>
          </name>
          <name name-style="western">
            <surname>Monga</surname>
            <given-names>HK</given-names>
          </name>
          <name name-style="western">
            <surname>Sievert</surname>
            <given-names>ME</given-names>
          </name>
          <name name-style="western">
            <surname>Houston</surname>
            <given-names>HJ</given-names>
          </name>
          <name name-style="western">
            <surname>Longo</surname>
            <given-names>DR</given-names>
          </name>
        </person-group>
        <article-title>Evaluation of controlled vocabulary resources for development of a consumer entry vocabulary for diabetes</article-title>
        <source>J Med Internet Res</source>  
        <year>2001</year>  
        <volume>3</volume>  
        <issue>3</issue>  
        <fpage>e24</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2001/3/e24/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.3.3.e24</pub-id>
        <pub-id pub-id-type="medline">11720966</pub-id>
        <pub-id pub-id-type="pmcid">PMC1761907</pub-id></nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Kogan</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Ash</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Greenes</surname>
            <given-names>RA</given-names>
          </name>
          <name name-style="western">
            <surname>Boxwala</surname>
            <given-names>AA</given-names>
          </name>
        </person-group>
        <article-title>Characteristics of consumer terminology for health information retrieval</article-title>
        <source>Methods Inf Med</source>  
        <year>2002</year>  
        <volume>41</volume>  
        <issue>4</issue>  
        <fpage>289</fpage>  
        <lpage>298</lpage>  
        <pub-id pub-id-type="medline">12425240</pub-id>
        <pub-id pub-id-type="pii">02040289</pub-id></nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Tse</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Soergel</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Exploring medical expressions used by consumers and the media: An emerging view of consumer health vocabularies</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2003</year>  
        <fpage>674</fpage>  
        <lpage>678</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/14728258"/>
        </comment>  
        <pub-id pub-id-type="medline">14728258</pub-id>
        <pub-id pub-id-type="pii">D030002918</pub-id>
        <pub-id pub-id-type="pmcid">PMC1479921</pub-id></nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>QT</given-names>
          </name>
          <name name-style="western">
            <surname>Tse</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Crowell</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Divita</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Roth</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Browne</surname>
            <given-names>AC</given-names>
          </name>
        </person-group>
        <article-title>Identifying consumer-friendly display (CFD) names for health concepts</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2005</year>  
        <fpage>859</fpage>  
        <lpage>863</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/16779162"/>
        </comment>  
        <pub-id pub-id-type="medline">16779162</pub-id>
        <pub-id pub-id-type="pii">58480</pub-id>
        <pub-id pub-id-type="pmcid">PMC1560732</pub-id></nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Keselman</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>CA</given-names>
          </name>
          <name name-style="western">
            <surname>Divita</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Browne</surname>
            <given-names>AC</given-names>
          </name>
          <name name-style="western">
            <surname>Leroy</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Zeng-Treitler</surname>
            <given-names>Q</given-names>
          </name>
        </person-group>
        <article-title>Consumer health concepts that do not map to the UMLS: Where do they fit?</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2008</year>  
        <volume>15</volume>  
        <issue>4</issue>  
        <fpage>496</fpage>  
        <lpage>505</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/lookup/pmidlookup?view=long&#38;pmid=18436906"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M2599</pub-id>
        <pub-id pub-id-type="medline">18436906</pub-id>
        <pub-id pub-id-type="pii">M2599</pub-id>
        <pub-id pub-id-type="pmcid">PMC2442253</pub-id></nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Crowell</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Tse</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>A text corpora-based estimation of the familiarity of health terminology</article-title>
        <source>Proceedings of the 6th International Symposium on Biological and Medical Data Analysis (ISBMDA 2005)</source>  
        <year>2005</year>  
        <conf-name>6th International Symposium on Biological and Medical Data Analysis (ISBMDA 2005)</conf-name>
        <conf-date>November 10-11, 2005</conf-date>
        <conf-loc>Aveiro, Portugal</conf-loc>
        <fpage>184</fpage>  
        <lpage>192</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://lhncbc.nlm.nih.gov/files/archive/pub2005041.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>McCallum</surname>
            <given-names>AK</given-names>
          </name>
        </person-group>
        <source>MALLET: A Machine Learning for Language Toolkit</source>  
        <year>2002</year>  
        <access-date>2016-07-04</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://mallet.cs.umass.edu">http://mallet.cs.umass.edu</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6il7RNCwf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Breiman</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Random forests</article-title>
        <source>Mach Learn</source>  
        <year>2001</year>  
        <volume>45</volume>  
        <issue>1</issue>  
        <fpage>5</fpage>  
        <lpage>32</lpage>  
        <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id></nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Breiman</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Bagging predictors</article-title>
        <source>Mach Learn</source>  
        <year>1996</year>  
        <volume>24</volume>  
        <issue>2</issue>  
        <fpage>123</fpage>  
        <lpage>140</lpage>  
        <pub-id pub-id-type="doi">10.1023/A:1018054314350</pub-id></nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ho</surname>
            <given-names>TK</given-names>
          </name>
        </person-group>
        <article-title>Random decision forests</article-title>
        <source>Proceedings of the Third International Conference on Document Analysis and Recognition (ICDAR'95)</source>  
        <year>1995</year>  
        <conf-name>The Third International Conference on Document Analysis and Recognition (ICDAR'95)</conf-name>
        <conf-date>August 14-15, 1995</conf-date>
        <conf-loc>Montreal, QC</conf-loc>
        <fpage>278</fpage>  
        <lpage>282</lpage> </nlm-citation>
      </ref>
      <ref id="ref79">
        <label>79</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Amit</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Geman</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Shape quantization and recognition with randomized trees</article-title>
        <source>Neural Comput</source>  
        <year>1997</year>  
        <month>10</month>  
        <volume>9</volume>  
        <issue>7</issue>  
        <fpage>1545</fpage>  
        <lpage>1588</lpage>  
        <pub-id pub-id-type="doi">10.1162/neco.1997.9.7.1545</pub-id></nlm-citation>
      </ref>
      <ref id="ref80">
        <label>80</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ho</surname>
            <given-names>TK</given-names>
          </name>
        </person-group>
        <article-title>The random subspace method for constructing decision forests</article-title>
        <source>IEEE Trans Pattern Anal Mach Intell</source>  
        <year>1998</year>  
        <volume>20</volume>  
        <issue>8</issue>  
        <fpage>832</fpage>  
        <lpage>844</lpage>  
        <pub-id pub-id-type="doi">10.1109/34.709601</pub-id></nlm-citation>
      </ref>
      <ref id="ref81">
        <label>81</label>
        <nlm-citation citation-type="web">  

        <article-title>Pedregosa F, Varoquaux G, Gramfort A, Michel V, Thirion B, Grisel O, et al. Scikit-learn: machine learning in Python</article-title>
        <source>J Mach Learn Res 2011;12:2825-2830</source>  
        <comment>
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf">http://www.jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf</ext-link>
</comment> </nlm-citation>
      </ref>
      <ref id="ref82">
        <label>82</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zesch</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Gurevych</surname>
            <given-names>I</given-names>
          </name>
        </person-group>
        <article-title>Approximate matching for evaluating keyphrase extraction</article-title>
        <source>Proceedings of the 2009 International Conference on Recent Advances in Natural Language Processing (RANLP 2009)</source>  
        <year>2009</year>  
        <conf-name>The 2009 International Conference on Recent Advances in Natural Language Processing (RANLP 2009)</conf-name>
        <conf-date>September 14-16, 2009</conf-date>
        <conf-loc>Borovets, Bulgaria</conf-loc>
        <fpage>484</fpage>  
        <lpage>489</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.aclweb.org/anthology/R09-1086"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref83">
        <label>83</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Pennell</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <article-title>Unsupervised approaches for automatic keyword extraction using meeting transcripts</article-title>
        <source>Proceedings of Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the ACL</source>  
        <year>2009</year>  
        <conf-name>Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the ACL</conf-name>
        <conf-date>May 31-June 5, 2009</conf-date>
        <conf-loc>Boulder, CO</conf-loc>
        <publisher-loc>Stroudsburg, PA</publisher-loc>
        <publisher-name>Association for Computational Linguistics</publisher-name>
        <fpage>620</fpage>  
        <lpage>628</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.anthology.aclweb.org/N/N09/N09-1070.pdf"/>
        </comment> </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
