<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v6i1e8</article-id>
    <article-id pub-id-type="pmid">29391345</article-id>
    <article-id pub-id-type="doi">10.2196/medinform.8662</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>Automated Information Extraction on Treatment and Prognosis for Non–Small Cell Lung Cancer Radiotherapy Patients: Clinical Study</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Eysenbach</surname>
          <given-names>Gunther</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Tao</surname>
          <given-names>Cui</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Zarogoulidis</surname>
          <given-names>Paul</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1">
        <name name-style="western">
          <surname>Zheng</surname>
          <given-names>Shuai</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-8090-9278</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib2">
        <name name-style="western">
          <surname>Jabbour</surname>
          <given-names>Salma K</given-names>
        </name>
        <degrees>MD</degrees>
        <xref rid="aff2" ref-type="aff">2</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-8200-5371</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>O'Reilly</surname>
          <given-names>Shannon E</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-7766-8979</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib4">
        <name name-style="western">
          <surname>Lu</surname>
          <given-names>James J</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-7888-7412</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib5">
        <name name-style="western">
          <surname>Dong</surname>
          <given-names>Lihua</given-names>
        </name>
        <degrees>MD</degrees>
        <xref rid="aff5" ref-type="aff">5</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-9211-8369</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib6">
        <name name-style="western">
          <surname>Ding</surname>
          <given-names>Lijuan</given-names>
        </name>
        <degrees>MD</degrees>
        <xref rid="aff5" ref-type="aff">5</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-9156-9328</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib7">
        <name name-style="western">
          <surname>Xiao</surname>
          <given-names>Ying</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-8558-6394</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib8">
        <name name-style="western">
          <surname>Yue</surname>
          <given-names>Ning</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff2" ref-type="aff">2</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-7885-7368</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib9" equal-contrib="yes">
        <name name-style="western">
          <surname>Wang</surname>
          <given-names>Fusheng</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff6" ref-type="aff">6</xref>
        <xref rid="aff7" ref-type="aff">7</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-9369-9361</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib10" corresp="yes" equal-contrib="yes">
      <name name-style="western">
        <surname>Zou</surname>
        <given-names>Wei</given-names>
      </name>
      <degrees>PhD</degrees>
      <xref rid="aff3" ref-type="aff">3</xref>
      <address>
        <institution>Penn Medicine</institution>
        <institution>Department of Radiation Oncology</institution>
        <institution>University of Pennsylvania</institution>
        <addr-line>3400 Civic Center Blvd</addr-line>
        <addr-line>Philadelphia, PA, 19104</addr-line>
        <country>United States</country>
        <phone>1 215 866 7087</phone>
        <email>wei.zou@uphs.upenn.edu</email>
      </address>  
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-5307-4685</ext-link></contrib>
    </contrib-group>
    <aff id="aff1">
    <sup>1</sup>
    <institution>Department of Biomedical Informatics</institution>
    <institution>Emory University</institution>  
    <addr-line>Atlanta, GA</addr-line>
    <country>United States</country></aff>
    <aff id="aff2">
    <sup>2</sup>
    <institution>Department of Radiation Oncology</institution>
    <institution>Rutgers Cancer Institute of New Jersey</institution>  
    <addr-line>New Brunswick, NJ</addr-line>
    <country>United States</country></aff>
    <aff id="aff3">
    <sup>3</sup>
    <institution>Penn Medicine</institution>
    <institution>Department of Radiation Oncology</institution>  
    <institution>University of Pennsylvania</institution>  
    <addr-line>Philadelphia, PA</addr-line>
    <country>United States</country></aff>
    <aff id="aff4">
    <sup>4</sup>
    <institution>Department of Mathematics and Computer Science</institution>
    <institution>Emory University</institution>  
    <addr-line>Atlanta, GA</addr-line>
    <country>United States</country></aff>
    <aff id="aff5">
    <sup>5</sup>
    <institution>Department of Radiation Oncology</institution>
    <institution>The First Hospital</institution>  
    <addr-line>Changchun</addr-line>
    <country>China</country></aff>
    <aff id="aff6">
    <sup>6</sup>
    <institution>Department of Biomedical Informatics</institution>
    <institution>Stony Brook University</institution>  
    <addr-line>Stony Brook, NY</addr-line>
    <country>United States</country></aff>
    <aff id="aff7">
    <sup>7</sup>
    <institution>Department of Computer Science</institution>
    <institution>Stony Brook University</institution>  
    <addr-line>Stony Brook, NY</addr-line>
    <country>United States</country></aff>
    <author-notes>
      <corresp>Corresponding Author: Wei Zou 
      <email>wei.zou@uphs.upenn.edu</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><season>Jan-Mar</season><year>2018</year></pub-date>
    <pub-date pub-type="epub">
      <day>01</day>
      <month>02</month>
      <year>2018</year>
    </pub-date>
    <volume>6</volume>
    <issue>1</issue>
    <elocation-id>e8</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>7</day>
        <month>8</month>
        <year>2017</year>
      </date>
      <date date-type="rev-request">
        <day>21</day>
        <month>9</month>
        <year>2017</year>
      </date>
      <date date-type="rev-recd">
        <day>15</day>
        <month>11</month>
        <year>2017</year>
      </date>
      <date date-type="accepted">
        <day>1</day>
        <month>12</month>
        <year>2017</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Shuai Zheng, Salma K Jabbour, Shannon E O'Reilly, James J Lu, Lihua Dong, Lijuan Ding, Ying Xiao, Ning Yue, Fusheng Wang, Wei Zou. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 01.02.2018.</copyright-statement>
    <copyright-year>2018</copyright-year>
    <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://medinform.jmir.org/2018/1/e8/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="background">
        <title>Background</title>
        <p>In outcome studies of oncology patients undergoing radiation, researchers extract valuable information from medical records generated before, during, and after radiotherapy visits, such as survival data, toxicities, and complications. Clinical studies rely heavily on these data to correlate the treatment regimen with the prognosis to develop evidence-based radiation therapy paradigms. These data are available mainly in forms of narrative texts or table formats with heterogeneous vocabularies. Manual extraction of the related information from these data can be time consuming and labor intensive, which is not ideal for large studies.</p>
      </sec>
      <sec sec-type="objective">
        <title>Objective</title>
        <p>The objective of this study was to adapt the interactive information extraction platform Information and Data Extraction using Adaptive Learning (IDEAL-X) to extract treatment and prognosis data for patients with locally advanced or inoperable non–small cell lung cancer (NSCLC).</p>
      </sec>
      <sec sec-type="methods">
        <title>Methods</title>
        <p>We transformed patient treatment and prognosis documents into normalized structured forms using the IDEAL-X system for easy data navigation. The adaptive learning and user-customized controlled toxicity vocabularies were applied to extract categorized treatment and prognosis data, so as to generate structured output.</p>
      </sec>
      <sec sec-type="results">
        <title>Results</title>
        <p>In total, we extracted data from 261 treatment and prognosis documents relating to 50 patients, with overall precision and recall more than 93% and 83%, respectively. For toxicity information extractions, which are important to study patient posttreatment side effects and quality of life, the precision and recall achieved 95.7% and 94.5% respectively.</p>
      </sec>
      <sec sec-type="conclusions">
        <title>Conclusions</title>
        <p>The IDEAL-X system is capable of extracting study data regarding NSCLC chemoradiation patients with significant accuracy and effectiveness, and therefore can be used in large-scale radiotherapy clinical data studies.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>information extraction</kwd>
      <kwd>oncology</kwd>
      <kwd>chemoradiation treatment</kwd>
      <kwd>prognosis</kwd>
      <kwd>non–small cell lung</kwd>
      <kwd>information storage and retrieval</kwd>
      <kwd>natural language processing</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Locally advanced or inoperable non–small cell lung cancer (NSCLC) occurs in approximately 20% to 30% of all cases of NSCLC [<xref ref-type="bibr" rid="ref1">1</xref>] and may be treated with a combination of definitive concurrent chemotherapy and radiation. Modern radiotherapy has made great advances in the care of NSCLC patients, by reducing potential toxicities using involved field irradiation, while improving survival rates [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. Assessing the effects of new developments in treatment techniques and regimens requires studies on the correlation between the treatment and prognosis [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. Such studies involve extracting extensive patient information on chemoradiation treatments and follow-up assessments, including survival, tumor control, and toxicities.</p>
      <p>Information about treatment and prognosis is embedded in treatment summaries and clinical encounter notes, which have various formats and diverse vocabularies. Manual extraction from large volumes of patient treatment summaries and records describing prognosis is time consuming and labor intensive. There is a need for an automated information system, as a natural language processing tool, to extract the needed patient treatment and prognosis data. During recent years, automated information systems have become widely used in medical and biomedical domains. The clinical Text Analysis and Knowledge Extraction System specializes in clinical information extraction [<xref ref-type="bibr" rid="ref8">8</xref>]. The Cancer Tissue Information Extraction System focuses on annotating cancer text [<xref ref-type="bibr" rid="ref9">9</xref>]. MedLEE supports connecting value to controlled vocabularies [<xref ref-type="bibr" rid="ref10">10</xref>]. MedEx aims to extract medication-related information such as dosage and duration [<xref ref-type="bibr" rid="ref11">11</xref>]. The Clinical Language Annotation, Modeling, and Processing toolkit integrates award-winning algorithms and, moreover, enables users to customize natural language processing components so as to encode clinical text automatically [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Medical text extraction 
processes pathology reports and uses rule-based methods to classify lung cancer stages [<xref ref-type="bibr" rid="ref14">14</xref>]. A recent study also demonstrated that the metastatic site and status of lung cancer could be extracted from pathology reports using a pipeline [<xref ref-type="bibr" rid="ref15">15</xref>]. Another study showed that cancer stage information could also be extracted with natural language processing [<xref ref-type="bibr" rid="ref16">16</xref>]. Most traditional information extraction systems rely on batch training or predefined rules and were designed for only limited medical domains or tasks.</p>
      <p>To support a retrospective study of NSCLC chemoradiotherapy patients, we adapted our in-house–developed information extraction platform, Information and Data Extraction using Adaptive Learning (IDEAL-X; X represents controlled vocabulary) system [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. This information extraction system aims to transform free-text clinical documents into structured data and has been used by projects in cardiology and pathology. IDEAL-X possesses unique features different from the systems mentioned above: (1) users may freely customize attributes to be extracted; (2) the system extracts information from narrative medical documents and generates normalized values to populate output tables and assist manual annotation; (3) it requires no mandatory configurations or training before performing annotation and adaptive learning processes; and (4) the system learns from users’ normal interactions transparently, and establishes and refines decision models incrementally, which further alleviates manual annotation efforts. <xref ref-type="fig" rid="figure1">Figure 1</xref> shows how the IDEAL-X system processes the input from free-text reports generated during physician and patient encounters and delivers structured output.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Screenshot of the Information and Data Extraction using Adaptive Learning (IDEAL-X) platform, and example input and output.</p>
        </caption>
        <graphic xlink:href="medinform_v6i1e8_fig1.jpg" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Patient Information</title>
        <p>We collected NSCLC patient data to investigate the relationship between shrinkage of the treated tumor and each category of prognosis data: survival, tumor control, and toxicities. The patient treatment data we needed to identify included the chemoradiotherapy drugs used, dose, and treatment time frame. From the follow-up clinical notes, we needed to extract tumor control information diagnosed from the patient’s follow-up computed tomography and positron emission tomography images, patient toxicities, and complication data, including skin, internal organ, blood, and overall body reactions to treatment. We further categorized toxicities into different toxicity grades [<xref ref-type="bibr" rid="ref20">20</xref>]. After we extracted the information in a structured format, we intended to use it to statistically correlate treatment tumor shrinkage with survival time, disease control rate, and the toxicities.</p>
        <p>From studies approved by the institutional review boards of both Rutgers University and Emory University, we retrospectively identified 50 patients who had primary unresectable, locally advanced, biopsy-proven stage II-III NSCLC, and who had received chemoradiotherapy with a median follow-up of 22 months. In total, we exported 261 treatment and patient follow-up documents from the patient electronic health record system ARIA (Varian Medical Systems, Inc, Palo Alto, CA, USA) and anonymized the data for this study.</p>
      </sec>
      <sec>
        <title>IDEAL-X System Development</title>
        <p>We adapted the IDEAL-X system to support automated information extraction from the NSCLC chemoradiation patients’ documents. After a requirement analysis, we added new features, such as extracting timex and parsing tabular information, to enhance the original system. We also implemented corresponding feature extraction and machine learning processes for timex and tabular formats, and constructed the dictionary to assist toxicity data extraction. We extracted patient information, such as treatment time frame and chemoradiotherapy, from treatment records with an adaptive learning process (<xref ref-type="table" rid="table1">Table 1</xref>). In extracting this information, the system began without any prior training and created its machine learning model incrementally. During the information extraction of the toxicities, the adaptive learning process was disabled. We used the dictionary shown in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> to aid in toxicities information extraction. Along with extracted values, the sentences where the values were embedded were also output in a spreadsheet, which could be used for further manual toxicity grade differentiation based on patient Common Terminology Criteria for Adverse Events guidelines v 4.0, which were designated previously in the patient charts [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
        <p>In addition, to verify the extracted data, we asked 2 physicians to manually annotate these reports. We used the manually annotated ground truth to validate the automatically generated output from the IDEAL-X system. We used precision and recall results to estimate the effectiveness of extraction.</p>
      </sec>
      <sec>
        <title>IDEAL-X Adaptive Learning Process</title>
        <p>Through adaptive learning, IDEAL-X established its decision model through ordinary operations in manual annotation. First, the user designated the value to fill every attribute in the structured output form. After a few initial documents, the system quickly learned important and related information that the user sought and began to generate standardized values automatically in subsequent documents. The system continued to learn and update its knowledge, without special user intervention. This incremental learning process made the system domain agnostic and not limited to a specific medical report. When available, a user-defined controlled dictionary and other configurations could also be provided by the user to facilitate this learning process, but they were not mandatory.</p>
      </sec>
      <sec>
        <title>System Data Flow</title>
        <p><xref ref-type="fig" rid="figure2">Figure 2</xref> demonstrates the system’s data flow. Each time that the system loaded a document, the system moved through the preprocessing phase and parsed the text to analyze and identify important linguistic features and natural language elements. These features and elements included (1) part of speech: the part-of-speech tag of each word, for example, noun and verb; (2): timex: the system relied on predefined regular expressions to identify timex, such as 2010-01-09 and Sep 13, 2013, and then indexed them based on their position in the text; (3) tabular information: the system identified and parsed tables in input text to comprehend underlying relations between values and the metadata in a table; (4) negation terms: the system detected negation terms and regions being affected, for example, in the case of “patient denies fever and fatigue,” “fever” and “fatigue” were not extracted as part of the toxicities; and (5) uncertain terms: the system identified uncertain phrases and regions being governed, for example, “We explained to her that the risks of the treatment included dysphagia and pneumonitis” meant that dysphagia and pneumonitis had not appeared yet as symptoms. We used these features to mark the input text and provide detailed linguistic indications during extraction.</p>
        <p>After preprocessing, the parsed text was investigated by the automated annotation component of the system to populate the output form automatically. First, sentences where possible values may be located were extracted based on text hierarchy, frequently co-occurring terms, previously extracted values, or user-customized vocabularies. The system then identified candidate phrases from located sentences using either a hidden Markov model [<xref ref-type="bibr" rid="ref21">21</xref>] chunker or a dictionary chunker. Subsequently, candidate values were examined by various filters based on linguistic features such as part of speech, certainty, or negation collected during preprocessing. After filtering, the sentence score and the chunk score were combined, on the basis of which a classifier determined the overall confidence score of each candidate value and categorized it as “accept” or “reject.”</p>
        <p>We then reviewed the automatically extracted values manually for the purpose of adaptive learning. We considered positive and negative scenarios: if the user navigated to the next document without changing any values, we regarded the values generated by the system as positive training cases; if the user modified any values, we regarded the system-generated values as negative training cases and the manually updated values as positive ones. We used the results of the review to support further improvements in the automated annotation component. Difference feature extract procedures, which model the traits of numerical, nominal, timex, and tabular data elements, were applied to corresponding positive and negative instances. By repeating these steps, the system became intelligent incrementally and delivered more accurate results.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Information extracted from treatment records of patients with non–small cell lung cancer.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="280"/>
            <col width="190"/>
            <col width="210"/>
            <col width="180"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Attributes</td>
                <td>Text data type</td>
                <td>Numbers of values</td>
                <td>Dictionary</td>
                <td>Adaptive learning</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Treatment site</td>
                <td>Nominal</td>
                <td>68</td>
                <td>N/A<sup>a</sup></td>
                <td>Yes</td>
              </tr>
              <tr valign="top">
                <td>Chemotherapy information</td>
                <td>Nominal</td>
                <td>56</td>
                <td>N/A</td>
                <td>Yes</td>
              </tr>
              <tr valign="top">
                <td>Treatment time frame</td>
                <td>Date</td>
                <td>92</td>
                <td>N/A</td>
                <td>Yes</td>
              </tr>
              <tr valign="top">
                <td>Radiation therapy dose</td>
                <td>Numerical</td>
                <td>97</td>
                <td>N/A</td>
                <td>Yes</td>
              </tr>
              <tr valign="top">
                <td>Toxicities</td>
                <td>Nominal</td>
                <td>331</td>
                <td>Yes</td>
                <td>N/A</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <boxed-text id="box1" position="float">
          <title>Dictionary of toxicities.</title>
          <p>Anemia</p>
          <p>Lymphopenia</p>
          <p>Anorexia</p>
          <p>Dehydration</p>
          <p>Dyspnea</p>
          <p>Fatigue</p>
          <p>Mucosal inflammation</p>
          <p>Radiation esophagitis</p>
          <p>Weight decrease</p>
          <p>Cough</p>
          <p>Febrile neutropenia</p>
          <p>Neutropenia</p>
          <p>Bronchitis</p>
          <p>Diarrhea</p>
          <p>Esophagitis</p>
          <p>Hyponatremia</p>
          <p>Nausea</p>
          <p>Radiation pneumonitis</p>
          <p>Dermatitis</p>
          <p>Leukopenia</p>
          <p>Thrombocytopenia</p>
          <p>Decreased appetite</p>
          <p>Dysphagia</p>
          <p>Failure to thrive</p>
          <p>Localized infection</p>
          <p>Pneumonia</p>
          <p>Vomiting</p>
          <p>Insomnia</p>
        </boxed-text>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Data flow in the Information and Data Extraction using Adaptive Learning (IDEAL-X) platform. EMR: electronic medical record.</p>
          </caption>
          <graphic xlink:href="medinform_v6i1e8_fig2.jpg" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p><xref ref-type="fig" rid="figure3">Figure 3</xref> shows the validation results against the manually annotated ground truth. In the validation for patient characteristics and tumor control, the system achieved an overall precision of over 93%. The recall values of all information were more than 83%. The recalls were lower than the precisions, as the recalls reflected the performance during the overall adaptive learning process—the system processed a few documents to construct and refine its decision model at its early stage in the adaptive learning process.</p>
      <p>Especially in the extraction of the toxicities, the negation detection and certainty detection filters contributed directly to the accuracy of extraction. With the help of a controlled dictionary, the system achieved an overall precision of 95.7% and recall of 94.5%.</p>
      <p>Within 1 second, a well-trained system can process patient documents of multiple pages and output the results in a predefined format. Compared with manual review, which requires reading through the entire document and manually annotating the notes on each patient, this system significantly improved the efficiency of information extraction.</p>
      <fig id="figure3" position="float">
        <label>Figure 3</label>
        <caption>
          <p>Effectiveness of data extraction as estimated by precision and recall of automatically generated output compared with manually annotated ground truth.</p>
        </caption>
        <graphic xlink:href="medinform_v6i1e8_fig3.jpg" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>IDEAL-X employed adaptive learning and a controlled vocabulary to support information extraction, which alleviated both the training and the deployment processes that could be expensive in applying a traditional information extraction system. The various data types IDEAL-X supports cover the most important and common information in oncology reports, which delivers great usability to our use case. We have demonstrated the great advantage of this system in greatly improving information extraction effectiveness while maintaining high accuracy when applied to extracting NSCLC patient treatment and prognoses data from heterogeneous document formats. In addition, because the system improves its performance incrementally, its accuracy could be further improved with additional training documents. Once trained, the developed system was able to process further fed-in reports in batch mode without revision. Without an intervening regular manual reporting process that handles input documents in sequence, the system accumulates knowledge transparently to empower the task and, therefore, could be conveniently integrated into a regular clinical workflow. The technology it used was domain agnostic and, therefore, could be transformed to other disease sites and studies in radiation oncology.</p>
      <sec>
        <title>Limitations</title>
        <p>In the validation analysis, the system also revealed some unavoidable limitations. The system identified and comprehended information based on explicitly expressed keywords. For example, the phrases “neoadjuvant chemo” and “upfront chemotherapy” may be used as keywords to identify chemotherapy induction. However, in situations where relevant information is distributed across different regions in the text, more insightful comprehension becomes necessary. For example, in the case of “After 4 cycles of chemotherapy and abdomen...we began radiation...,” the system was not intelligent enough to interpret the meaning of “4 cycles” as “neoadjuvant chemotherapy” behind the narrations. In general, this sophisticated scenario reveals the limitation of this information extraction-based approach. The system requires explicit keywords or hints to determine an event; however, it cannot reason and analyze factors collected from different sources. Such cases resulted in lower recalls for chemotherapy than for other attributes and demanded a manual review. Therefore, to facilitate the manual review, we output the associated sentence with the extracted information together in tabular format for user manual review and validation at a later time.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>We adapted the IDEAL-X system to automatically extract treatment and prognostic information for stage II and III NSCLC patients who had received chemoradiation. With this system, patient information was extracted efficiently from their medical documents in various formats. The system, together with minimized manual review efforts, generated outputs with high precision and recall. It significantly improved the effectiveness and can be easily applied to other radiation oncology patient studies at larger scales.</p>
      </sec>
    </sec>
  </body>
  <back>
    <glossary>
      <title>Abbreviations</title>
      <def-list>

<def-item>
          <term id="abb1">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>

        <def-item>
          <term id="abb2">IDEAL-X</term>
          <def>
            <p>Information and Data Extraction using Adaptive Learning</p>
          </def>
        </def-item>
       
        <def-item>
          <term id="abb3">NSCLC</term>
          <def>
            <p>non–small cell lung cancer</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ramalingam</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Belani</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Systemic chemotherapy for advanced non-small cell lung cancer: recent advances and future directions</article-title>
        <source>Oncologist</source>  
        <year>2008</year>  
        <volume>13 Suppl 1</volume>  
        <fpage>5</fpage>  
        <lpage>13</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://theoncologist.alphamedpress.org/cgi/pmidlookup?view=long&#38;pmid=18263769"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1634/theoncologist.13-S1-5</pub-id>
        <pub-id pub-id-type="medline">18263769</pub-id>
        <pub-id pub-id-type="pii">13/suppl_1/5</pub-id></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Furuse</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Fukuoka</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kawahara</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Nishikawa</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Takada</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Kudoh</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Katagami</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Ariyoshi</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <article-title>Phase III study of concurrent versus sequential thoracic radiotherapy in combination with mitomycin, vindesine, and cisplatin in unresectable stage III non-small-cell lung cancer</article-title>
        <source>J Clin Oncol</source>  
        <year>1999</year>  
        <month>09</month>  
        <volume>17</volume>  
        <issue>9</issue>  
        <fpage>2692</fpage>  
        <lpage>9</lpage>  
        <pub-id pub-id-type="doi">10.1200/JCO.1999.17.9.2692</pub-id>
        <pub-id pub-id-type="medline">10561343</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Belani</surname>
            <given-names>CP</given-names>
          </name>
          <name name-style="western">
            <surname>Choy</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Bonomi</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Scott</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Travis</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Haluschak</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Curran</surname>
            <given-names>WJ</given-names>
          </name>
        </person-group>
        <article-title>Combined chemoradiotherapy regimens of paclitaxel and carboplatin for locally advanced non-small-cell lung cancer: a randomized phase II locally advanced multi-modality protocol</article-title>
        <source>J Clin Oncol</source>  
        <year>2005</year>  
        <month>09</month>  
        <day>01</day>  
        <volume>23</volume>  
        <issue>25</issue>  
        <fpage>5883</fpage>  
        <lpage>91</lpage>  
        <pub-id pub-id-type="doi">10.1200/JCO.2005.55.405</pub-id>
        <pub-id pub-id-type="medline">16087941</pub-id>
        <pub-id pub-id-type="pii">JCO.2005.55.405</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Liao</surname>
            <given-names>ZX</given-names>
          </name>
          <name name-style="western">
            <surname>Komaki</surname>
            <given-names>RR</given-names>
          </name>
          <name name-style="western">
            <surname>Thames</surname>
            <given-names>HD</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>HH</given-names>
          </name>
          <name name-style="western">
            <surname>Tucker</surname>
            <given-names>SL</given-names>
          </name>
          <name name-style="western">
            <surname>Mohan</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Martel</surname>
            <given-names>MK</given-names>
          </name>
          <name name-style="western">
            <surname>Wei</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>ES</given-names>
          </name>
          <name name-style="western">
            <surname>Blumenschein</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Hong</surname>
            <given-names>WK</given-names>
          </name>
          <name name-style="western">
            <surname>Cox</surname>
            <given-names>JD</given-names>
          </name>
        </person-group>
        <article-title>Influence of technologic advances on outcomes in patients with unresectable, locally advanced non-small-cell lung cancer receiving concomitant chemoradiotherapy</article-title>
        <source>Int J Radiat Oncol Biol Phys</source>  
        <year>2010</year>  
        <month>03</month>  
        <day>01</day>  
        <volume>76</volume>  
        <issue>3</issue>  
        <fpage>775</fpage>  
        <lpage>81</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.ijrobp.2009.02.032</pub-id>
        <pub-id pub-id-type="medline">19515503</pub-id>
        <pub-id pub-id-type="pii">S0360-3016(09)00322-8</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bral</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>De Ridder</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Duchateau</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Gevaert</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Engels</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Schallier</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Storme</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Daily megavoltage computed tomography in lung cancer radiotherapy: correlation between volumetric changes and local outcome</article-title>
        <source>Int J Radiat Oncol Biol Phys</source>  
        <year>2011</year>  
        <month>08</month>  
        <day>01</day>  
        <volume>80</volume>  
        <issue>5</issue>  
        <fpage>1338</fpage>  
        <lpage>42</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.ijrobp.2010.04.002</pub-id>
        <pub-id pub-id-type="medline">20638192</pub-id>
        <pub-id pub-id-type="pii">S0360-3016(10)00505-5</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Aupérin</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Le Pechoux</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Rolland</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Curran</surname>
            <given-names>WJ</given-names>
          </name>
          <name name-style="western">
            <surname>Furuse</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Fournel</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Belderbos</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Clamon</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Ulutin</surname>
            <given-names>HC</given-names>
          </name>
          <name name-style="western">
            <surname>Paulus</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Yamanaka</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Bozonnat</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Uitterhoeve</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Stewart</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Arriagada</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Burdett</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Pignon</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Meta-analysis of concomitant versus sequential radiochemotherapy in locally advanced non-small-cell lung cancer</article-title>
        <source>J Clin Oncol</source>  
        <year>2010</year>  
        <month>05</month>  
        <day>01</day>  
        <volume>28</volume>  
        <issue>13</issue>  
        <fpage>2181</fpage>  
        <lpage>90</lpage>  
        <pub-id pub-id-type="doi">10.1200/JCO.2009.26.2543</pub-id>
        <pub-id pub-id-type="medline">20351327</pub-id>
        <pub-id pub-id-type="pii">JCO.2009.26.2543</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Jabbour</surname>
            <given-names>SK</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Haider</surname>
            <given-names>SA</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Surakanti</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Aisner</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Langenfeld</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Yue</surname>
            <given-names>NJ</given-names>
          </name>
          <name name-style="western">
            <surname>Haffty</surname>
            <given-names>BG</given-names>
          </name>
          <name name-style="western">
            <surname>Zou</surname>
            <given-names>W</given-names>
          </name>
        </person-group>
        <article-title>Reduction in tumor volume by cone-beam computed tomography predicts overall survival in non-small cell lung cancer treated with chemoradiation therapy</article-title>
        <source>Int J Radiat Oncol Biol Phys</source>  
        <year>2015</year>  
        <month>07</month>  
        <day>01</day>  
        <volume>92</volume>  
        <issue>3</issue>  
        <fpage>627</fpage>  
        <lpage>33</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26068495"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.ijrobp.2015.02.017</pub-id>
        <pub-id pub-id-type="medline">26068495</pub-id>
        <pub-id pub-id-type="pii">S0360-3016(15)00184-4</pub-id>
        <pub-id pub-id-type="pmcid">PMC5767471</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Savova</surname>
            <given-names>GK</given-names>
          </name>
          <name name-style="western">
            <surname>Masanz</surname>
            <given-names>JJ</given-names>
          </name>
          <name name-style="western">
            <surname>Ogren</surname>
            <given-names>PV</given-names>
          </name>
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Sohn</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Kipper-Schuler</surname>
            <given-names>KC</given-names>
          </name>
          <name name-style="western">
            <surname>Chute</surname>
            <given-names>CG</given-names>
          </name>
        </person-group>
        <article-title>Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): architecture, component evaluation and applications</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2010</year>  
        <volume>17</volume>  
        <issue>5</issue>  
        <fpage>507</fpage>  
        <lpage>13</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/lookup/pmidlookup?view=long&#38;pmid=20819853"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/jamia.2009.001560</pub-id>
        <pub-id pub-id-type="medline">20819853</pub-id>
        <pub-id pub-id-type="pii">17/5/507</pub-id>
        <pub-id pub-id-type="pmcid">PMC2995668</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Crowley</surname>
            <given-names>RS</given-names>
          </name>
          <name name-style="western">
            <surname>Castine</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Mitchell</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Chavan</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>McSherry</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Feldman</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>caTIES: a grid based system for coding and retrieval of surgical pathology reports and tissue specimens in support of translational research</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2010</year>  
        <volume>17</volume>  
        <issue>3</issue>  
        <fpage>253</fpage>  
        <lpage>64</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20442142"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/jamia.2009.002295</pub-id>
        <pub-id pub-id-type="medline">20442142</pub-id>
        <pub-id pub-id-type="pii">17/3/253</pub-id>
        <pub-id pub-id-type="pmcid">PMC2995710</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <collab>FierceBiotech</collab>
        </person-group>
        <source>Columbia grants Health Fidelity exclusive license to MedLEE NLP</source>  
        <year>2012</year>  
        <month>01</month>  
        <day>11</day>  
        <access-date>2017-08-07</access-date>
        <publisher-loc>Newton, MA</publisher-loc>
        <publisher-name>Questex</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.fiercebiotech.com/biotech/columbia-grants-health-fidelity-exclusive-license-to-medlee-nlp">http://www.fiercebiotech.com/biotech/columbia-grants-health-fidelity-exclusive-license-to-medlee-nlp</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6sXdtEJnH"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Stenner</surname>
            <given-names>SP</given-names>
          </name>
          <name name-style="western">
            <surname>Doan</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Johnson</surname>
            <given-names>KB</given-names>
          </name>
          <name name-style="western">
            <surname>Waitman</surname>
            <given-names>LR</given-names>
          </name>
          <name name-style="western">
            <surname>Denny</surname>
            <given-names>JC</given-names>
          </name>
        </person-group>
        <article-title>MedEx: a medication information extraction system for clinical narratives</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2010</year>  
        <volume>17</volume>  
        <issue>1</issue>  
        <fpage>19</fpage>  
        <lpage>24</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=20064797"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M3378</pub-id>
        <pub-id pub-id-type="medline">20064797</pub-id>
        <pub-id pub-id-type="pii">17/1/19</pub-id>
        <pub-id pub-id-type="pmcid">PMC2995636</pub-id></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
        <source>CLAMP: Clinical Language Annotation, Modeling, and Processing Toolkit</source>  
        <year>2018</year>  
        <access-date>2018-01-18</access-date>
        <publisher-loc>Houston, TX</publisher-loc>
        <publisher-name>The University of Texas Health Science Center at Houston</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://clamp.uth.edu/index.php">http://clamp.uth.edu/index.php</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6wZIVTGfK"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Moon</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>UTHealth at SemEval-2016 Task 12: an end-to-end system for temporal information extraction from clinical notes</article-title>
        <year>2016</year>  
        <conf-name>10th International Workshop on Semantic Evaluation (SemEval-2016)</conf-name>
        <conf-date>June 16-17, 2016</conf-date>
        <conf-loc>San Diego, CA, USA</conf-loc></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>AN</given-names>
          </name>
          <name name-style="western">
            <surname>Lawley</surname>
            <given-names>MJ</given-names>
          </name>
          <name name-style="western">
            <surname>Hansen</surname>
            <given-names>DP</given-names>
          </name>
          <name name-style="western">
            <surname>Bowman</surname>
            <given-names>RV</given-names>
          </name>
          <name name-style="western">
            <surname>Clarke</surname>
            <given-names>BE</given-names>
          </name>
          <name name-style="western">
            <surname>Duhig</surname>
            <given-names>EE</given-names>
          </name>
          <name name-style="western">
            <surname>Colquist</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Symbolic rule-based classification of lung cancer stages from free-text pathology reports</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2010</year>  
        <volume>17</volume>  
        <issue>4</issue>  
        <fpage>440</fpage>  
        <lpage>5</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20595312"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/jamia.2010.003707</pub-id>
        <pub-id pub-id-type="medline">20595312</pub-id>
        <pub-id pub-id-type="pii">17/4/440</pub-id>
        <pub-id pub-id-type="pmcid">PMC2995652</pub-id></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Soysal</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Warner</surname>
            <given-names>JL</given-names>
          </name>
          <name name-style="western">
            <surname>Denny</surname>
            <given-names>JC</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Identifying metastases-related information from pathology reports of lung cancer patients</article-title>
        <source>AMIA Jt Summits Transl Sci Proc</source>  
        <year>2017</year>  
        <volume>2017</volume>  
        <fpage>268</fpage>  
        <lpage>277</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28815141"/>
        </comment>  
        <pub-id pub-id-type="medline">28815141</pub-id>
        <pub-id pub-id-type="pmcid">PMC5543353</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Warner</surname>
            <given-names>JL</given-names>
          </name>
          <name name-style="western">
            <surname>Levy</surname>
            <given-names>MA</given-names>
          </name>
          <name name-style="western">
            <surname>Neuss</surname>
            <given-names>MN</given-names>
          </name>
          <name name-style="western">
            <surname>Warner</surname>
            <given-names>JL</given-names>
          </name>
          <name name-style="western">
            <surname>Levy</surname>
            <given-names>MA</given-names>
          </name>
          <name name-style="western">
            <surname>Neuss</surname>
            <given-names>MN</given-names>
          </name>
        </person-group>
        <article-title>ReCAP: feasibility and accuracy of extracting cancer stage information from narrative electronic health record data</article-title>
        <source>J Oncol Pract</source>  
        <year>2016</year>  
        <month>02</month>  
        <volume>12</volume>  
        <issue>2</issue>  
        <fpage>157</fpage>  
        <lpage>8; e169</lpage>  
        <pub-id pub-id-type="doi">10.1200/JOP.2015.004622</pub-id>
        <pub-id pub-id-type="medline">26306621</pub-id>
        <pub-id pub-id-type="pii">JOP.2015.004622</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>JJ</given-names>
          </name>
          <name name-style="western">
            <surname>Ghasemzadeh</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Hayek</surname>
            <given-names>SS</given-names>
          </name>
          <name name-style="western">
            <surname>Quyyumi</surname>
            <given-names>AA</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>Effective information extraction framework for heterogeneous clinical reports using online machine learning and controlled vocabularies</article-title>
        <source>JMIR Med Inform</source>  
        <year>2017</year>  
        <month>05</month>  
        <day>09</day>  
        <volume>5</volume>  
        <issue>2</issue>  
        <fpage>e12</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://medinform.jmir.org/2017/2/e12/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/medinform.7235</pub-id>
        <pub-id pub-id-type="medline">28487265</pub-id>
        <pub-id pub-id-type="pii">v5i2e12</pub-id>
        <pub-id pub-id-type="pmcid">PMC5442348</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>JJ</given-names>
          </name>
          <name name-style="western">
            <surname>Appin</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Brat</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>Support patient search on pathology reports with interactive online learning based data extraction</article-title>
        <source>J Pathol Inform</source>  
        <year>2015</year>  
        <volume>6</volume>  
        <fpage>51</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jpathinformatics.org/article.asp?issn=2153-3539;year=2015;volume=6;issue=1;spage=51;epage=51;aulast=Zheng"/>
        </comment>  
        <pub-id pub-id-type="doi">10.4103/2153-3539.166012</pub-id>
        <pub-id pub-id-type="medline">26605116</pub-id>
        <pub-id pub-id-type="pii">JPI-6-51</pub-id>
        <pub-id pub-id-type="pmcid">PMC4629306</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>JJ</given-names>
          </name>
        </person-group>
        <article-title>ASLForm: an adaptive self learning medical form generating system</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2013</year>  
        <volume>2013</volume>  
        <fpage>1590</fpage>  
        <lpage>9</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24551429"/>
        </comment>  
        <pub-id pub-id-type="medline">24551429</pub-id>
        <pub-id pub-id-type="pmcid">PMC3900168</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
        <source>Common Terminology Criteria for Adverse Events (CTCAE) version 4.03</source>  
        <year>2010</year>  
        <month>06</month>  
        <day>14</day>  
        <access-date>2018-01-18</access-date>
        <publisher-loc>Washington, DC</publisher-loc>
        <publisher-name>U.S. Department of Health and Human Services, National Institutes of Health, and National Cancer Institute</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://evs.nci.nih.gov/ftp1/CTCAE/CTCAE_4.03_2010-06-14_QuickReference_5x7.pdf">https://evs.nci.nih.gov/ftp1/CTCAE/CTCAE_4.03_2010-06-14_QuickReference_5x7.pdf</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6wZIjlhp3"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Elliott</surname>
            <given-names>RJ</given-names>
          </name>
          <name name-style="western">
            <surname>Aggoun</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Moore</surname>
            <given-names>JB</given-names>
          </name>
        </person-group>
        <source>Hidden Markov Models: Estimation and Control</source>  
        <year>1994</year>  
        <publisher-loc>New York, NY</publisher-loc>
        <publisher-name>Springer</publisher-name></nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
