<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i7e27955</article-id>
      <article-id pub-id-type="pmid">34287213</article-id>
      <article-id pub-id-type="doi">10.2196/27955</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Automatic Extraction of Lung Cancer Staging Information From Computed Tomography Reports: Deep Learning Approach</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Hao</surname>
            <given-names>Tianyong</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Huang</surname>
            <given-names>Zhengxing</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Tang</surname>
            <given-names>Buzhou</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Su</surname>
            <given-names>Zhaohui</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chen</surname>
            <given-names>Huiling</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Roberts</surname>
            <given-names>Kirk</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Hu</surname>
            <given-names>Danqing</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0810-4819</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Huanyao</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2034-0355</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Shaolei</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1122-9292</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Yuhong</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4007-0228</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Nan</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3531-1656</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>Xudong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>College of Biomedical Engineering and Instrumental Science</institution>
            <institution>Zhejiang University</institution>
            <addr-line>38 Zheda Road</addr-line>
            <addr-line>Hangzhou, 310027</addr-line>
            <country>China</country>
            <phone>86 13957118891</phone>
            <email>lvxd@zju.edu.cn</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7658-5250</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>College of Biomedical Engineering and Instrumental Science</institution>
        <institution>Zhejiang University</institution>
        <addr-line>Hangzhou</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Key Laboratory for Biomedical Engineering</institution>
        <institution>Ministry of Education</institution>
        <addr-line>Hangzhou</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Thoracic Surgery II</institution>
        <institution>Peking University Cancer Hospital &#38; Institute</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Xudong Lu <email>lvxd@zju.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>21</day>
        <month>7</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>7</issue>
      <elocation-id>e27955</elocation-id>
      <history>
        <date date-type="received">
          <day>15</day>
          <month>2</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>29</day>
          <month>4</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>27</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>7</day>
          <month>6</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Danqing Hu, Huanyao Zhang, Shaolei Li, Yuhong Wang, Nan Wu, Xudong Lu. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 21.07.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/7/e27955" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Lung cancer is the leading cause of cancer deaths worldwide. Clinical staging of lung cancer plays a crucial role in making treatment decisions and evaluating prognosis. However, in clinical practice, approximately one-half of the clinical stages of lung cancer patients are inconsistent with their pathological stages. As one of the most important diagnostic modalities for staging, chest computed tomography (CT) provides a wealth of information about cancer staging, but the free-text nature of the CT reports obstructs their computerization.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aimed to automatically extract the staging-related information from CT reports to support accurate clinical staging of lung cancer.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>In this study, we developed an information extraction (IE) system to extract the staging-related information from CT reports. The system consisted of the following three parts: named entity recognition (NER), relation classification (RC), and postprocessing (PP). We first summarized 22 questions about lung cancer staging based on the TNM staging guideline. Next, three state-of-the-art NER algorithms were implemented to recognize the entities of interest. Next, we designed a novel RC method using the relation sign constraint (RSC) to classify the relations between entities. Finally, a rule-based PP module was established to obtain the formatted answers using the results of NER and RC.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We evaluated the developed IE system on a clinical data set containing 392 chest CT reports collected from the Department of Thoracic Surgery II in the Peking University Cancer Hospital. The experimental results showed that the bidirectional encoder representation from transformers (BERT) model outperformed the iterated dilated convolutional neural networks-conditional random field (ID-CNN-CRF) and bidirectional long short-term memory networks-conditional random field (Bi-LSTM-CRF) for NER tasks with macro-F1 scores of 80.97% and 90.06% under the exact and inexact matching schemes, respectively. For the RC task, the proposed RSC showed better performance than the baseline methods. Further, the BERT-RSC model achieved the best performance with a macro-F1 score of 97.13% and a micro-F1 score of 98.37%. Moreover, the rule-based PP module could correctly obtain the formatted results using the extractions of NER and RC, achieving a macro-F1 score of 94.57% and a micro-F1 score of 96.74% for all the 22 questions.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We conclude that the developed IE system can effectively and accurately extract information about lung cancer staging from CT reports. Experimental results show that the extracted results have significant potential for further use in stage verification and prediction to facilitate accurate clinical staging.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>lung cancer</kwd>
        <kwd>clinical staging</kwd>
        <kwd>information extraction</kwd>
        <kwd>named entity recognition</kwd>
        <kwd>relation classification</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Lung cancer is a group of diseases involving abnormal cell growth in the lung tissue with the potential to invade adjoining parts of the body and spread to other organs. It is the most commonly diagnosed cancer and the leading cause of cancer deaths worldwide [<xref ref-type="bibr" rid="ref1">1</xref>], which has been a heavy burden on communities and a critical barrier to increasing life expectancy.</p>
        <p>Clinical staging of lung cancer plays a critical role in making treatment decisions making and evaluating prognosis [<xref ref-type="bibr" rid="ref2">2</xref>]. In current clinical practice, clinicians usually decide the clinical staging of lung cancer. Although various advanced diagnostic modalities with high sensitivity and specificity are used by clinical experts, clinical staging still disagrees with pathological staging in approximately one-half of patients, as reported in earlier studies [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Incorrect clinical staging of lung cancer may result in suboptimal treatment decisions, possibly leading to poor outcomes [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>As an indispensable examination technique for lung cancer patients, chest computed tomography (CT) provides a large volume of valuable information about the primary tumor and lymph nodes, which is of paramount importance for clinical staging [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Besides, the reports record the inferences of radiologists about the findings from the images. Although this useful information in the form of natural language is effective and convenient for communication in medical clinical settings, its free-text nature poses difficulties when summarizing or analyzing this information for secondary purposes such as research and quality improvement. Moreover, manually extracting this information is time-consuming and expensive [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p>
        <p>In this study, we aimed to develop an information extraction (IE) system to automatically extract valuable information from CT reports using natural language processing (NLP) techniques to support accurate clinical staging. We first summarized 22 questions about the diagnosis and staging of lung cancer based on the TNM stage guideline [<xref ref-type="bibr" rid="ref8">8</xref>]. Subsequently, 14 types of entities and 4 types of relations were defined to represent the related information in the CT reports. Using the annotated reports, the following three state-of-the-art deep learning named entity recognition (NER) models were developed to label the entities: iterated dilated convolutional neural networks (ID-CNN) [<xref ref-type="bibr" rid="ref9">9</xref>], bidirectional long short-term memory networks (Bi-LSTM) [<xref ref-type="bibr" rid="ref10">10</xref>], and bidirectional encoder representation from transformers (BERT) [<xref ref-type="bibr" rid="ref11">11</xref>]. Next, a novel relation classification (RC) approach using the relation sign constraint (RSC) was proposed to determine the relations between entities. Finally, a rule-based postprocessing (PP) module was developed to obtain the formatted results by analyzing the entities and relations extracted by NER and RC. We empirically evaluated our system using a real clinical data set. Experimental results showed that the system could extract entities and relations as well as obtain the answers to the questions correctly. Using these extracted results, we can verify the clinical staging accuracy and further develop staging prediction models to alleviate the problem of inaccurate clinical staging.</p>
      </sec>
      <sec>
        <title>Related Works</title>
        <p>IE refers to the task of automatically extracting structured semantics (eg, entities, relations, and events) from unstructured text. Cancer information is often extracted from free-text clinical narratives, such as operation notes, radiology, and pathology reports, using rule-based, machine learning, or hybrid methods, which have been widely investigated [<xref ref-type="bibr" rid="ref12">12</xref>]. In terms of staging information, most studies have extracted only the clinical or pathological stage statements (eg, Stage I, Stage II, and T3N2) but not detailed phenotypes [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref20">20</xref>]. Besides the stage statements, Savova et al [<xref ref-type="bibr" rid="ref21">21</xref>] and Ping et al [<xref ref-type="bibr" rid="ref22">22</xref>] extracted some tumor-related information such as the location and size. However, these extracted phenotypes are considerably limited in their ability to support staging, particularly for lymph nodes. To support diagnosis and staging, Yim et al [<xref ref-type="bibr" rid="ref23">23</xref>] employed a hybrid method to recognize diverse entities and relations from radiology reports for hepatocellular cancer patients, but without further elaboration on how to exploit the extracted information. Chen et al [<xref ref-type="bibr" rid="ref24">24</xref>] extracted information from various clinical notes including operation notes and CT reports to calculate the Cancer of Liver Italian Program (CLIP) score for hepatocellular cancer patients; however, they provide limited details about the radiology corpus extraction. Bozkurt et al first developed an IE pipeline to extract various types of information from mammography reports [<xref ref-type="bibr" rid="ref25">25</xref>] and then used the extracted features as the inputs for Bayesian networks to predict malignancy of breast cancer [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
        <p>These rule-based and conventional machine learning methods have extracted information about cancer successfully, and some of them have exploited the extracted results to provide further diagnosis and staging decision support. Nevertheless, the development of hand-craft features and usage of external resources like the Unified Machine Language System (UMLS) and Systematized Nomenclature of Medicine Clinical Terms (SNOMED CT) are time-consuming and can even result in additional propagation errors [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. Recently, with the rapid development of deep neural networks, advanced approaches exhibit excellent performance in many NLP tasks without tedious feature engineering [<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref33">33</xref>]. Furthermore, some researchers began to adopt these advanced techniques to extract cancer information. Si et al [<xref ref-type="bibr" rid="ref34">34</xref>] proposed a frame-based NLP method using Bi-LSTM-conditional random field (Bi-LSTM-CRF) to extract cancer-related information by a two-stage strategy. They first identified the keywords in the sentences to determine their frames and then employed models to label the entities in this frame. Using this strategy, they grouped the related entities by different frames. A limitation of this study is that they only evaluated each process in the pipeline using gold standard annotations separately but did not report the overall results of the pipeline. Gao et al [<xref ref-type="bibr" rid="ref35">35</xref>] proposed a novel hierarchical attention network to predict the primary sites and histological grades of tumors in a text classification manner. Although this approach can directly provide the classification results and show the importance of each word in the text, the scope of the information extracted is considerably limited and insufficient to support cancer diagnosis and staging.</p>
        <p>In this study, we aimed to develop an IE system using deep learning methods to extract information about lung cancer staging from CT reports to better support the accurate clinical staging of lung cancer. Our specific contributions involve (1) defining a group of entity types and relation types to cover a wealth of information about lung cancer staging in CT reports, (2) applying advanced deep learning algorithms to develop the IE system, and (3) evaluating the performance of the IE system in a pipeline manner using real clinical CT reports.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p><xref rid="figure1" ref-type="fig">Figure 1</xref> illustrates the development process of the IE system. First, we annotated the entities and relations in the collected CT reports as the gold standard. Next, the annotated CT reports were used to develop and evaluate the three core parts of the IE system. We also used 50 CT reports to verify the overall performance of the IE system in a pipeline manner. The details of each part are elaborated as follows.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Development process of the information extraction system. BERT: bidirectional encoder representation from transformers; BERT-RC: bidirectional encoder representation from transformers-relation classification; CT: computed tomography; NER: named entity recognition; RC: relation classification.</p>
        </caption>
        <graphic xlink:href="medinform_v9i7e27955_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <sec>
        <title>Data Annotation</title>
        <p>In clinical practice, clinicians usually follow the TNM staging guideline to stage the patients. Therefore, we first analyzed the eighth edition of the lung cancer TNM staging summary and parsed it into 41 questions to determine the scope of staging information (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Note that the staging guideline covers three aspects of lung cancer (ie, tumor [T], lymph node [N], and metastases [M]), with detailed criteria. Chest CT can hardly provide all the information related to lung cancer staging. Clinicians also use other diagnostic modalities like positron emission tomography (PET), magnetic resonance imaging (MRI), and pathological biopsy to stage the patients. Thus, based on the content of the CT reports, 19 questions were identified under the clinician’s guidance. Moreover, we also included 3 questions about the shape, density, and enhancement extent of the tumors. These 3 questions can facilitate the diagnosis of benign and malignant tumors. All 22 questions are listed in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <p>Based on the questions listed in <xref ref-type="table" rid="table1">Table 1</xref>, we defined 14 types of entities and 4 types of relations to represent the staging-related information in the CT reports. <xref ref-type="table" rid="table2">Table 2</xref> shows the defined entities. <xref rid="figure2" ref-type="fig">Figure 2</xref> illustrates the entity–entity relation map.</p>
        <p>Two medical informatics engineers were recruited to annotate the 392 CT reports by manually following the annotation guideline. The details of the annotation guideline are listed in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. Note that to obtain the annotation guideline, the annotators first independently annotated 10 reports and discussed the discrepancies until a consensus was reached in consultation with clinicians, resulting in a revised annotation guideline. Using the revised guideline, the annotators independently annotated 10 new reports and repeated the above process. In this manner, the guideline was refined by at least five iterations of annotation, discussion, consultation, and amendment, and then finalized. According to the final annotation guideline, we randomly selected 100 reports for annotation by both annotators to measure the interannotator agreement using the kappa statistic [<xref ref-type="bibr" rid="ref36">36</xref>]. The remaining 292 reports were annotated only by either of the annotators. The BIO labeling scheme was employed to annotate the data. We employed brat [<xref ref-type="bibr" rid="ref37">37</xref>] as the annotation tool. <xref rid="figure3" ref-type="fig">Figure 3</xref> shows an example of the annotated CT reports.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Questions about lung cancer diagnosis and staging<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="50"/>
            <col width="760"/>
            <col width="120"/>
            <col width="70"/>
            <thead>
              <tr valign="bottom">
                <td>No.</td>
                <td>Question</td>
                <td>Type of answer</td>
                <td>Stage</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Whether the tumor can be visualized by imaging or bronchoscopy?</td>
                <td>Yes/No</td>
                <td>TX</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>What is the greatest dimension of the tumor?</td>
                <td>Numerical</td>
                <td>T1-4</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Whether the tumor invades the lobar bronchus?</td>
                <td>Yes/No</td>
                <td>T1</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Whether the tumor invades the visceral pleura?</td>
                <td>Yes/No</td>
                <td>T2</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Whether there is an atelectasis or obstructive pneumonitis that extends to the hilar region, either involving part of the lung or the entire lung?</td>
                <td>Yes/No</td>
                <td>T2</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Whether there is (are) associated separate tumor nodule (s) in the same lobe as the primary?</td>
                <td>Yes/No</td>
                <td>T3</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>Whether the tumor invades the great vessels?</td>
                <td>Yes/No</td>
                <td>T4</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>Whether the tumor invades the vertebral body?</td>
                <td>Yes/No</td>
                <td>T4</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>Whether there is (are) separate tumor nodule (s) in a different ipsilateral lobe to that of the primary?</td>
                <td>Yes/No</td>
                <td>T4</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>Whether there is regional lymph node metastasis?</td>
                <td>Yes/No</td>
                <td>N0</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>Whether there is metastasis in ipsilateral hilar lymph nodes, including involvement by direct extension?</td>
                <td>Yes/No</td>
                <td>N1</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>Whether there is metastasis in ipsilateral mediastinal lymph nodes?</td>
                <td>Yes/No</td>
                <td>N2</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>Whether there is metastasis in subcarinal lymph nodes?</td>
                <td>Yes/No</td>
                <td>N2</td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>Whether there is metastasis in contralateral mediastinal lymph nodes?</td>
                <td>Yes/No</td>
                <td>N3</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>Whether there is metastasis in contralateral hilar lymph nodes?</td>
                <td>Yes/No</td>
                <td>N3</td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>Whether there is metastasis in supraclavicular lymph nodes?</td>
                <td>Yes/No</td>
                <td>N3</td>
              </tr>
              <tr valign="top">
                <td>17</td>
                <td>Whether there is (are) separate tumor nodule (s) in a contralateral lobe?</td>
                <td>Yes/No</td>
                <td>M1a</td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>Whether the tumor with pleural nodules?</td>
                <td>Yes/No</td>
                <td>M1a</td>
              </tr>
              <tr valign="top">
                <td>19</td>
                <td>Whether there is malignant pleural or pericardial effusion?</td>
                <td>Yes/No</td>
                <td>M1a</td>
              </tr>
              <tr valign="top">
                <td>20<sup>b</sup></td>
                <td>What is the shape of the tumor?</td>
                <td>Text</td>
                <td>NA</td>
              </tr>
              <tr valign="top">
                <td>21<sup>b</sup></td>
                <td>What is the density of the tumor?</td>
                <td>Text</td>
                <td>NA</td>
              </tr>
              <tr valign="top">
                <td>22<sup>b</sup></td>
                <td>What is the enhancement extent of the tumor?</td>
                <td>Text</td>
                <td>NA</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>The stages are based on the eighth edition of the lung cancer TNM staging summary.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>The questions are not used for staging but are important for diagnosis of benign and malignant tumors.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Types of entities with descriptions and instances.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="400"/>
            <col width="400"/>
            <thead>
              <tr valign="top">
                <td>Entity type</td>
                <td>Description</td>
                <td>Instance</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Mass</td>
                <td>Suspected mass/nodule/lesion in the lung</td>
                <td>肿物 (mass)</td>
              </tr>
              <tr valign="top">
                <td>Lymph node</td>
                <td>Suspected lymph node metastasis</td>
                <td>肿大淋巴结 (enlarged lymph node)</td>
              </tr>
              <tr valign="top">
                <td>Location</td>
                <td>Location of mass or lymph node</td>
                <td>左上肺右基底段 (right basal segment of the upper left lung)</td>
              </tr>
              <tr valign="top">
                <td>Size</td>
                <td>Size of mass or lymph node</td>
                <td>25×22 cm</td>
              </tr>
              <tr valign="top">
                <td>Negation</td>
                <td>Negative words</td>
                <td>未见 (unseen)</td>
              </tr>
              <tr valign="top">
                <td>Density</td>
                <td>Density of mass</td>
                <td>磨玻璃密度 (ground glass density)</td>
              </tr>
              <tr valign="top">
                <td>Enhancement</td>
                <td>Enhancement extent of mass</td>
                <td>强化明显 (significant enhancement)</td>
              </tr>
              <tr valign="top">
                <td>Shape</td>
                <td>Shape of mass</td>
                <td>边缘见毛刺 (spiculate boundary)</td>
              </tr>
              <tr valign="top">
                <td>Bronchus</td>
                <td>Description of bronchial invasion</td>
                <td>支气管狭窄 (bronchial stenosis)</td>
              </tr>
              <tr valign="top">
                <td>Pleura</td>
                <td>Description of pleural invasion or metastasis</td>
                <td>胸膜凹陷 (pleural indentation)</td>
              </tr>
              <tr valign="top">
                <td>Vessel</td>
                <td>Description of great vessel invasion</td>
                <td>包绕左肺动脉 (surrounds the right lower pulmonary artery)</td>
              </tr>
              <tr valign="top">
                <td>Vertebral body</td>
                <td>Description of vertebral body invasion</td>
                <td>椎体见骨质破坏 (bone destruction seen in the vertebral body)</td>
              </tr>
              <tr valign="top">
                <td>Effusion</td>
                <td>Description of pleural or pericardial effusion</td>
                <td>心包积液 (pericardial effusion)</td>
              </tr>
              <tr valign="top">
                <td>PAOP<sup>a</sup></td>
                <td>Description of pulmonary atelectasis or obstructive pneumonitis</td>
                <td>肺组织不张 (atelectasis)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>PAOP: pulmonary atelectasis/obstructive pneumonitis.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Entity–entity relation map for extracting lung cancer staging information. PAOP: pulmonary atelectasis/obstructive pneumonitis.</p>
          </caption>
          <graphic xlink:href="medinform_v9i7e27955_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Annotated computed tomography report based on the annotation guideline.</p>
          </caption>
          <graphic xlink:href="medinform_v9i7e27955_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Word Embedding</title>
        <p>As an unsupervised feature representation technique, word embedding maps the words to vectors of real values to capture the semantic and syntactic information from the corpus. In this study, we adopted the word embedding technique pretrained on the Chinese Wikipedia corpus using word2vec [<xref ref-type="bibr" rid="ref38">38</xref>] for conventional CNN and recurrent neural network (RNN) models. Note that unlike English, Chinese words can be composed of multiple characters but with no space appearing between words. To incorporate the word segmentation information into the NER task, we first used jieba [<xref ref-type="bibr" rid="ref39">39</xref>], a well-known Chinese text segmentation toolkit, to segment the sentence. Then, we used the randomly initialized real-value vectors to represent whether a character is the first, middle, or last character of the segmented word as in the segmentation embedding. For BERT, we used the default vocabulary to map the tokens to natural numbers.</p>
      </sec>
      <sec>
        <title>NER Process</title>
        <p>NER is an essential technique to identify the types and boundaries of the entities of interest, which can drive other NLP tasks [<xref ref-type="bibr" rid="ref40">40</xref>-<xref ref-type="bibr" rid="ref43">43</xref>]. Recently developed deep learning NER methods exhibit more powerful performances than the traditional methods without tedious feature engineering [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref44">44</xref>]. In this study, we selected ID-CNN-CRF, Bi-LSTM-CRF, and BERT to recognize the entities.</p>
        <p>ID-CNN is an advanced algorithm extending from the dilated CNN [<xref ref-type="bibr" rid="ref45">45</xref>]. Instead of simply increasing the depth of a stacked dilated CNN, the ID-CNN applies the same small stack of dilated convolutions multiple times, with each iteration taking the result of the last application as the input to incorporate global information from a whole sentence and alleviate the overfitting problem. Bi-LSTM is another deep learning method using the recurrent neural network architecture that can capture the long-distance dependencies of context from both sides of the sequence and alleviate gradient vanishing or explosion during entity recognition from clinical text. A CRF layer was also employed on the ID-CNN and Bi-LSTM models, as it can exploit the relation constraints among different labels to find the optimal label path for sequence labeling tasks.</p>
        <p>BERT is a novel language representation model pretrained on a large corpus using bidirectional transformers [<xref ref-type="bibr" rid="ref46">46</xref>]. Unlike the traditional embedding methods that can only represent a word with polysemy using one fixed vector, BERT can dynamically adjust the representation depending on the context of the word. It can also be easily fine-tuned to adapt to specific tasks, such as NER, RC, and question answering, and it has shown more powerful performance than conventional CNN and RNN models.</p>
      </sec>
      <sec>
        <title>RC Process</title>
        <p>RC is the task of finding semantic relations between pairs of entities, which can group the relevant entities together to generate richer semantics [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. Although traditional RC methods have achieved satisfactory performance [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>], deep learning RC methods obtained better results and provided an effective way to alleviate the problem of hand-craft features [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. In this study, we selected attention-based bidirectional long short-term memory networks (Attention-Bi-LSTM) [<xref ref-type="bibr" rid="ref32">32</xref>] and BERT to classify the relations between entities.</p>
        <p>Note that in this study, two entities in a sentence can only have one type of relation or no relation depending on the definition in <xref rid="figure2" ref-type="fig">Figure 2</xref>. For instance, the relation between a lymph node entity and a location entity may be At or NoRelation, but definitely not a SizeOf relation. This information is useful for simplifying the multiclassification problem into a binary classification problem. We propose a novel approach, namely RSC, to use this extra information for RC. Before using the original sentence for relation classification, we first added the tags, namely At, SizeOf, Negate, Related, and NoRelation at the beginning of the sentence (eg, “At&#60;e1&#62;左肺门及纵隔4、5组&#60;/e1&#62;见&#60;e2&#62;肿大淋巴结&#60;/e2&#62;，较大约14×12m.”). The added At tag is determined based on the two target entity types (location and lymph node). Then, the sentence with the tag can be input into the RC model. Using this method, we can simply incorporate the entity–entity relation constraints into the model to improve the prediction performance.</p>
      </sec>
      <sec>
        <title>PP Step</title>
        <p>To obtain the answers to the questions listed in <xref ref-type="table" rid="table1">Table 1</xref>, it is not enough to directly use the extracted triples (entity 1–relation–entity 2), and further analysis is needed. For example, to answer the question on whether there is metastasis in ipsilateral mediastinal lymph nodes, we first need to know whether there exist a primary tumor and a mediastinal lymph node metastasis for this patient, and then determine the relative position of these two. In this study, we developed a rule-based PP module to process the extracted triples by the NER and RC models. The PP step is presented in <xref rid="figure4" ref-type="fig">Figure 4</xref> and the rules are listed in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Postprocessing steps.</p>
          </caption>
          <graphic xlink:href="medinform_v9i7e27955_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Evaluation Metrics</title>
        <p>To evaluate the performance of the models, we used the precision, recall, and F1 score as the evaluation metrics. Moreover, we also employed the microaverages and macroaverages for overall performance evaluation. The corresponding formulations are listed in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Annotation Results</title>
        <p>A total of 392 chest CT reports of lung cancer patients were collected from the Department of Thoracic Surgery II in the Peking University Cancer Hospital. Two medical informatics engineers were recruited to annotate the entities and relations based on the annotation guideline. The statistics of the annotations are summarized in <xref ref-type="table" rid="table3">Tables 3</xref> and <xref ref-type="table" rid="table4">4</xref>. We had both the engineers annotate 100 CT reports to calculate the interannotator agreement, and the κ values were 0.937 for the entity annotation and 0.946 for the relation annotation, indicating the reliability of the annotation. Prior approval was obtained from the Ethics Committee of the Peking University Cancer Hospital to conduct this study.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Statistics of annotated named entities.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Entity</td>
                <td>Annotated entities, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Mass</td>
                <td>767</td>
              </tr>
              <tr valign="top">
                <td>Lymph node</td>
                <td>492</td>
              </tr>
              <tr valign="top">
                <td>Location</td>
                <td>1748</td>
              </tr>
              <tr valign="top">
                <td>Size</td>
                <td>699</td>
              </tr>
              <tr valign="top">
                <td>Negation</td>
                <td>808</td>
              </tr>
              <tr valign="top">
                <td>Density</td>
                <td>147</td>
              </tr>
              <tr valign="top">
                <td>Enhancement</td>
                <td>146</td>
              </tr>
              <tr valign="top">
                <td>Shape</td>
                <td>437</td>
              </tr>
              <tr valign="top">
                <td>Bronchus</td>
                <td>124</td>
              </tr>
              <tr valign="top">
                <td>Pleura</td>
                <td>262</td>
              </tr>
              <tr valign="top">
                <td>Vessel</td>
                <td>41</td>
              </tr>
              <tr valign="top">
                <td>Vertebral body</td>
                <td>25</td>
              </tr>
              <tr valign="top">
                <td>Effusion</td>
                <td>363</td>
              </tr>
              <tr valign="top">
                <td>PAOP</td>
                <td>78</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Statistics of annotated relations.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Relation</td>
                <td>Annotated relations, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>At</td>
                <td>1811</td>
              </tr>
              <tr valign="top">
                <td>SizeOf</td>
                <td>683</td>
              </tr>
              <tr valign="top">
                <td>Related</td>
                <td>988</td>
              </tr>
              <tr valign="top">
                <td>Negate</td>
                <td>803</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>NER Results</title>
        <p>To train and evaluate the NER models, we randomly separated 70% of the CT reports as the training set, 10% as the validation set, and 20% as the test set. The early stopping strategy was used on the validation set to avoid the overfitting problem. The hyperparameters used in this study are listed in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>. We repeated the entire training and evaluation process five times to reduce the possible bias that may be caused by data partitioning.</p>
        <p><xref ref-type="table" rid="table5">Table 5</xref> and <xref rid="figure5" ref-type="fig">Figure 5</xref> show the results of the NER models. As shown in <xref ref-type="table" rid="table5">Table 5</xref>, the BERT model achieves the best overall performance with a macro-F1 score of 80.97% and a micro-F1 score of 88.5%. We can notice that the entities with several annotations or plain descriptions (eg, “Lymph Node,” “Negation,” “Size,” and “Effusion”) obtain satisfactory results with F1 scores greater than 90%. However, performances degraded for the entities with a small number of annotations or diverse descriptions (eg, “Shape,” “Pleura,” “Vessel,” “Vertebral Body,” and “PAOP”) <xref rid="figure5" ref-type="fig">Figure 5</xref> shows the results in a more intuitive manner with standard deviations.</p>
        <p>By further analyzing the extractions, we found that most of the errors were due to an inexact match, where a predicted entity overlapped with the gold standard. For example, the predicted entity “余 (B-Location)肺 (I-Location)内 (O)” is an inexact match for the gold standard annotation “余 (B-Location)肺 (I-Location)内 (I-Location).” Although these extractions could not cover the gold standard exactly, the partially matched entities still contained useful information for RC and PP. We also calculated the inexact matching performances for each type of entity and have presented them in <xref ref-type="table" rid="table6">Table 6</xref> and <xref rid="figure6" ref-type="fig">Figure 6</xref>.</p>
        <p>As shown in <xref ref-type="table" rid="table6">Table 6</xref>, the macro-F1 scores of ID-CNN-CRF, Bi-LSTM-CRF, and BERT using the inexact metrics are 89.6%, 89.96%, and 90.06%, which obtain improvements of 13.93%, 12.69%, and 9.09% compared with the exact metrics, respectively. Furthermore, the micro-F1 scores of the inexact metrics are all above 94%. Almost all the entities obtain better extraction results under the inexact matching scheme, especially those entities with diverse descriptions, which indicates that the extractions cover most of the annotations.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Performance of the named entity recognition models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="100"/>
            <col width="80"/>
            <col width="100"/>
            <col width="100"/>
            <col width="80"/>
            <col width="100"/>
            <col width="100"/>
            <col width="80"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>Entity</td>
                <td colspan="3">ID-CNN-CRF<sup>a</sup></td>
                <td colspan="3">Bi-LSTM-CRF<sup>b</sup></td>
                <td colspan="3">BERT<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td>F1 score (%)</td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td>F1 score (%)</td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td>F1 score (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Mass</td>
                <td>83.11</td>
                <td>87.79</td>
                <td>85.35</td>
                <td>83.86</td>
                <td>88.02</td>
                <td>85.88</td>
                <td>87.92</td>
                <td>86.05</td>
                <td>87.61</td>
              </tr>
              <tr valign="top">
                <td>Lymph node</td>
                <td>92.29</td>
                <td>95.42</td>
                <td>93.83</td>
                <td>93.29</td>
                <td>94.79</td>
                <td>94.04</td>
                <td>91.52</td>
                <td>93.07</td>
                <td>92.27</td>
              </tr>
              <tr valign="top">
                <td>Location</td>
                <td>84.85</td>
                <td>87.40</td>
                <td>86.1</td>
                <td>86.99</td>
                <td>89.3</td>
                <td>88.12</td>
                <td>87.93</td>
                <td>86.99</td>
                <td>87.44</td>
              </tr>
              <tr valign="top">
                <td>Size</td>
                <td>91.6</td>
                <td>95</td>
                <td>93.24</td>
                <td>92.29</td>
                <td>94.92</td>
                <td>93.56</td>
                <td>94.03</td>
                <td>94.44</td>
                <td>94.22</td>
              </tr>
              <tr valign="top">
                <td>Negation</td>
                <td>97.66</td>
                <td>98.45</td>
                <td>98.02</td>
                <td>97.77</td>
                <td>98.79</td>
                <td>98.26</td>
                <td>99.12</td>
                <td>99.11</td>
                <td>99.11</td>
              </tr>
              <tr valign="top">
                <td>Density</td>
                <td>64.16</td>
                <td>69.66</td>
                <td>66.61</td>
                <td>68.4</td>
                <td>71.47</td>
                <td>69.73</td>
                <td>75.55</td>
                <td>68.49</td>
                <td>71.75</td>
              </tr>
              <tr valign="top">
                <td>Enhancement</td>
                <td>74.48</td>
                <td>81.04</td>
                <td>77.47</td>
                <td>74.33</td>
                <td>78.4</td>
                <td>76.14</td>
                <td>81.39</td>
                <td>75.03</td>
                <td>77.69</td>
              </tr>
              <tr valign="top">
                <td>Shape</td>
                <td>82.65</td>
                <td>83.85</td>
                <td>83.21</td>
                <td>78.95</td>
                <td>83.38</td>
                <td>81</td>
                <td>82.72</td>
                <td>81.8</td>
                <td>82.2</td>
              </tr>
              <tr valign="top">
                <td>Bronchus</td>
                <td>66.45</td>
                <td>67.96</td>
                <td>67.11</td>
                <td>62.57</td>
                <td>69.55</td>
                <td>65.66</td>
                <td>74.17</td>
                <td>76.88</td>
                <td>75.1</td>
              </tr>
              <tr valign="top">
                <td>Pleura</td>
                <td>81.48</td>
                <td>79.39</td>
                <td>80.36</td>
                <td>83.54</td>
                <td>83.28</td>
                <td>83.39</td>
                <td>84.59</td>
                <td>77.13</td>
                <td>80.21</td>
              </tr>
              <tr valign="top">
                <td>Vessel</td>
                <td>37.52</td>
                <td>41.59</td>
                <td>39.05</td>
                <td>44.5</td>
                <td>43.13</td>
                <td>43.27</td>
                <td>68.09</td>
                <td>54.53</td>
                <td>58.51</td>
              </tr>
              <tr valign="top">
                <td>Vertebral body</td>
                <td>36.43</td>
                <td>60.17</td>
                <td>42.75</td>
                <td>46.52</td>
                <td>67.5</td>
                <td>53.17</td>
                <td>82</td>
                <td>66.67</td>
                <td>72.24</td>
              </tr>
              <tr valign="top">
                <td>Effusion</td>
                <td>97.02</td>
                <td>97.25</td>
                <td>97.11</td>
                <td>95.77</td>
                <td>97.3</td>
                <td>96.51</td>
                <td>98.32</td>
                <td>97.25</td>
                <td>97.78</td>
              </tr>
              <tr valign="top">
                <td>PAOP<sup>d</sup></td>
                <td>47.67</td>
                <td>51.33</td>
                <td>49.11</td>
                <td>50.28</td>
                <td>57.25</td>
                <td>53.04</td>
                <td>65.86</td>
                <td>53.2</td>
                <td>57.46</td>
              </tr>
              <tr valign="top">
                <td>Macroaverage</td>
                <td>74.1</td>
                <td>78.31</td>
                <td>75.67</td>
                <td>75.65</td>
                <td>79.79</td>
                <td>77.27</td>
                <td>83.8</td>
                <td>79.33</td>
                <td>80.97</td>
              </tr>
              <tr valign="top">
                <td>Microaverage</td>
                <td>85.85</td>
                <td>88.41</td>
                <td>87.11</td>
                <td>86.56</td>
                <td>89.32</td>
                <td>87.92</td>
                <td>89.28</td>
                <td>87.78</td>
                <td>88.5</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>ID-CNN-CRF: iterated dilated convolutional neural networks-conditional random field.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>Bi-LSTM-CRF: bidirectional long short-term memory networks- conditional random field.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>BERT: bidirectional encoder representation from transformers.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>PAOP: pulmonary atelectasis/obstructive pneumonitis.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>F1 scores with bars showing the standard deviations of the named entity recognition models. Bi-LSTM-CRF: bidirectional long short-term memory networks-conditional random field; BERT: bidirectional encoder representation from transformers; ID-CNN-CRF: iterated dilated convolutional neural networks-conditional random field; PAOP: pulmonary atelectasis/obstructive pneumonitis.</p>
          </caption>
          <graphic xlink:href="medinform_v9i7e27955_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Performance of the named entity recognition models calculated using the inexact matching scheme.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <col width="0"/>
            <col width="100"/>
            <col width="80"/>
            <col width="100"/>
            <col width="0"/>
            <col width="100"/>
            <col width="80"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td>Entity</td>
                <td colspan="4">ID-CNN-CRF<sup>a</sup></td>
                <td colspan="4">Bi-LSTM-CRF<sup>b</sup></td>
                <td colspan="3">BERT<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td>F1 score (%)</td>
                <td colspan="2">Precision (%)</td>
                <td>Recall (%)</td>
                <td>F1 score (%)</td>
                <td colspan="2">Precision (%)</td>
                <td>Recall (%)</td>
                <td>F1 score (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Mass</td>
                <td>89.78</td>
                <td>94.81</td>
                <td>92.19</td>
                <td colspan="2">90.71</td>
                <td>95.2</td>
                <td>92.89</td>
                <td colspan="2">94.11</td>
                <td>92.05</td>
                <td>93.02</td>
              </tr>
              <tr valign="top">
                <td>Lymph node</td>
                <td>97.11</td>
                <td>100.42</td>
                <td>98.73</td>
                <td colspan="2">97.43</td>
                <td>99</td>
                <td>98.2</td>
                <td colspan="2">97.8</td>
                <td>99.41</td>
                <td>98.59</td>
              </tr>
              <tr valign="top">
                <td>Location</td>
                <td>91.88</td>
                <td>94.66</td>
                <td>93.24</td>
                <td colspan="2">92.73</td>
                <td>95.2</td>
                <td>93.95</td>
                <td colspan="2">95.01</td>
                <td>93.99</td>
                <td>94.47</td>
              </tr>
              <tr valign="top">
                <td>Size</td>
                <td>95.33</td>
                <td>98.9</td>
                <td>97.06</td>
                <td colspan="2">96.28</td>
                <td>99.06</td>
                <td>97.62</td>
                <td colspan="2">96.58</td>
                <td>97.03</td>
                <td>96.79</td>
              </tr>
              <tr valign="top">
                <td>Negation</td>
                <td>97.66</td>
                <td>98.45</td>
                <td>98.02</td>
                <td colspan="2">97.77</td>
                <td>98.79</td>
                <td>98.26</td>
                <td colspan="2">99.12</td>
                <td>99.11</td>
                <td>99.11</td>
              </tr>
              <tr valign="top">
                <td>Density</td>
                <td>84.09</td>
                <td>90.53</td>
                <td>86.95</td>
                <td colspan="2">82.39</td>
                <td>86.13</td>
                <td>84.01</td>
                <td colspan="2">94.48</td>
                <td>85.49</td>
                <td>89.64</td>
              </tr>
              <tr valign="top">
                <td>Enhancement</td>
                <td>85.64</td>
                <td>93.26</td>
                <td>89.11</td>
                <td colspan="2">86.79</td>
                <td>92.3</td>
                <td>89.25</td>
                <td colspan="2">91.53</td>
                <td>85.03</td>
                <td>87.73</td>
              </tr>
              <tr valign="top">
                <td>Shape</td>
                <td>91.59</td>
                <td>92.92</td>
                <td>92.22</td>
                <td colspan="2">88.76</td>
                <td>93.94</td>
                <td>91.16</td>
                <td colspan="2">92.23</td>
                <td>91.12</td>
                <td>91.6</td>
              </tr>
              <tr valign="top">
                <td>Bronchus</td>
                <td>83.20</td>
                <td>85.03</td>
                <td>84.01</td>
                <td colspan="2">79.4</td>
                <td>89.19</td>
                <td>83.73</td>
                <td colspan="2">84.76</td>
                <td>87.76</td>
                <td>85.75</td>
              </tr>
              <tr valign="top">
                <td>Pleura</td>
                <td>93.07</td>
                <td>90.44</td>
                <td>91.67</td>
                <td colspan="2">92.86</td>
                <td>92.54</td>
                <td>92.67</td>
                <td colspan="2">93.12</td>
                <td>85.7</td>
                <td>88.73</td>
              </tr>
              <tr valign="top">
                <td>Vessel</td>
                <td>81.29</td>
                <td>79.18</td>
                <td>79.09</td>
                <td colspan="2">84.66</td>
                <td>73.82</td>
                <td>77.52</td>
                <td colspan="2">89.03</td>
                <td>67.5</td>
                <td>75.58</td>
              </tr>
              <tr valign="top">
                <td>Vertebral body</td>
                <td>63.81</td>
                <td>92.5</td>
                <td>71.76</td>
                <td colspan="2">65.76</td>
                <td>86.67</td>
                <td>72.28</td>
                <td colspan="2">92</td>
                <td>73.33</td>
                <td>80.24</td>
              </tr>
              <tr valign="top">
                <td>Effusion</td>
                <td>98.4</td>
                <td>98.64</td>
                <td>98.5</td>
                <td colspan="2">98.18</td>
                <td>99.74</td>
                <td>98.93</td>
                <td colspan="2">100</td>
                <td>98.91</td>
                <td>99.45</td>
              </tr>
              <tr valign="top">
                <td>PAOP<sup>d</sup></td>
                <td>80.1</td>
                <td>84.64</td>
                <td>81.84</td>
                <td colspan="2">84.23</td>
                <td>96.39</td>
                <td>89.03</td>
                <td colspan="2">90.23</td>
                <td>74.99</td>
                <td>80.13</td>
              </tr>
              <tr valign="top">
                <td>Macroaverage</td>
                <td>88.07</td>
                <td>92.46</td>
                <td>89.6</td>
                <td colspan="2">88.43</td>
                <td>92.71</td>
                <td>89.96</td>
                <td colspan="2">93.57</td>
                <td>87.96</td>
                <td>90.06</td>
              </tr>
              <tr valign="top">
                <td>Microaverage</td>
                <td>92.66</td>
                <td>95.42</td>
                <td>94.01</td>
                <td colspan="2">92.87</td>
                <td>95.84</td>
                <td>94.32</td>
                <td colspan="2">95.39</td>
                <td>93.81</td>
                <td>94.57</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>ID-CNN-CRF: iterated dilated convolutional neural networks-conditional random field.</p>
            </fn>
            <fn id="table6fn2">
              <p><sup>b</sup>Bi-LSTM-CRF: bidirectional long short-term memory networks- conditional random field.</p>
            </fn>
            <fn id="table6fn3">
              <p><sup>c</sup>BERT: bidirectional encoder representation from transformers.</p>
            </fn>
            <fn id="table6fn4">
              <p><sup>d</sup>PAOP: pulmonary atelectasis/obstructive pneumonitis.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Inexactly matching F1 scores with bar showing the standard deviations of the named entity recognition models. Bi-LSTM-CRF: bidirectional long short-term memory networks-conditional random field; BERT: bidirectional encoder representation from transformers; ID-CNN-CRF: iterated dilated convolutional neural networks-conditional random field; PAOP: pulmonary atelectasis/obstructive pneumonitis.</p>
          </caption>
          <graphic xlink:href="medinform_v9i7e27955_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>RC Results</title>
        <p>To evaluate the proposed RC method, the data set was randomly separated such that 70%, 10%, and 20% of the CT reports were used as the training, validation, and test sets, respectively. Attention-Bi-LSTM and BERT were selected as the baselines. The annotated entities were provided in this step for evaluating the performance of the RC models. The hyperparameters of the RC models are listed in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>. We also repeated the entire training and evaluation process five times with different random seeds to alleviate the possible bias caused by data partitioning.</p>
        <p><xref ref-type="table" rid="table7">Table 7</xref> and <xref rid="figure7" ref-type="fig">Figure 7</xref> show the experimental results of the RC models. As depicted in <xref ref-type="table" rid="table7">Table 7</xref>, all the four models achieve excellent performances with macro-F1 values above 95% and micro-F1 values above 97%. Comparing the baseline and proposed methods indicates that the RSC improves the performances of both the baseline models, especially for the Related RC. Moreover, the BERT-RSC achieves the best performance among all the models.</p>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Performance of the proposed and baseline relation classification models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="110"/>
            <col width="80"/>
            <col width="70"/>
            <col width="70"/>
            <col width="0"/>
            <col width="80"/>
            <col width="70"/>
            <col width="70"/>
            <col width="0"/>
            <col width="80"/>
            <col width="70"/>
            <col width="70"/>
            <col width="0"/>
            <col width="80"/>
            <col width="70"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td>Relation</td>
                <td colspan="8">Baseline</td>
                <td colspan="7">Proposed</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="4">Attention-Bi-LSTM<sup>a</sup></td>
                <td colspan="4">BERT<sup>b</sup></td>
                <td colspan="4">Attention-Bi-LSTM-RSC<sup>c</sup></td>
                <td colspan="3">BERT-RSC<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td>F1 score (%)</td>
                <td colspan="2">Precision (%)</td>
                <td>Recall (%)</td>
                <td>F1 score (%)</td>
                <td colspan="2">Precision (%)</td>
                <td>Recall (%)</td>
                <td>F1 score (%)</td>
                <td colspan="2">Precision (%)</td>
                <td>Recall (%)</td>
                <td>F1 score (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>At</td>
                <td>96.02</td>
                <td>94.47</td>
                <td>95.23</td>
                <td colspan="2">96.3</td>
                <td>95.39</td>
                <td>95.83</td>
                <td colspan="2">96.25</td>
                <td>94.79</td>
                <td>95.5</td>
                <td colspan="2">96.95</td>
                <td>95.55</td>
                <td>96.23</td>
              </tr>
              <tr valign="top">
                <td>SizeOf</td>
                <td>97.05</td>
                <td>97.51</td>
                <td>97.27</td>
                <td colspan="2">98.13</td>
                <td>97.35</td>
                <td>97.73</td>
                <td colspan="2">97.19</td>
                <td>98.1</td>
                <td>97.61</td>
                <td colspan="2">98.11</td>
                <td>98.42</td>
                <td>98.25</td>
              </tr>
              <tr valign="top">
                <td>Related</td>
                <td>88.17</td>
                <td>91.47</td>
                <td>89.65</td>
                <td colspan="2">85.22</td>
                <td>94.7</td>
                <td>89.64</td>
                <td colspan="2">88.95</td>
                <td>92.31</td>
                <td>90.55</td>
                <td colspan="2">89.17</td>
                <td>96.27</td>
                <td>92.56</td>
              </tr>
              <tr valign="top">
                <td>Negate</td>
                <td>98.7</td>
                <td>97.07</td>
                <td>97.87</td>
                <td colspan="2">99.38</td>
                <td>99.63</td>
                <td>99.5</td>
                <td colspan="2">99.33</td>
                <td>97.82</td>
                <td>98.56</td>
                <td colspan="2">99.38</td>
                <td>99.74</td>
                <td>99.56</td>
              </tr>
              <tr valign="top">
                <td>NoRelation</td>
                <td>98.7</td>
                <td>98.67</td>
                <td>98.68</td>
                <td colspan="2">99.11</td>
                <td>98.57</td>
                <td>98.84</td>
                <td colspan="2">98.83</td>
                <td>98.77</td>
                <td>98.8</td>
                <td colspan="2">99.22</td>
                <td>98.87</td>
                <td>99.05</td>
              </tr>
              <tr valign="top">
                <td>Macroaverage</td>
                <td>95.73</td>
                <td>95.84</td>
                <td>95.74</td>
                <td colspan="2">95.63</td>
                <td>97.13</td>
                <td>96.31</td>
                <td colspan="2">96.11</td>
                <td>96.36</td>
                <td>96.2</td>
                <td colspan="2">96.57</td>
                <td>97.77</td>
                <td>97.13</td>
              </tr>
              <tr valign="top">
                <td>Microaverage</td>
                <td>97.88</td>
                <td>97.88</td>
                <td>97.88</td>
                <td colspan="2">98.11</td>
                <td>98.11</td>
                <td>98.11</td>
                <td colspan="2">98.08</td>
                <td>98.08</td>
                <td>98.08</td>
                <td colspan="2">98.48</td>
                <td>98.48</td>
                <td>98.37</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>ID-CNN-CRF: iterated dilated convolutional neural networks-conditional random field.</p>
            </fn>
            <fn id="table7fn2">
              <p><sup>b</sup>Bi-LSTM-CRF: bidirectional long short-term memory networks- conditional random field.</p>
            </fn>
            <fn id="table7fn3">
              <p><sup>c</sup>BERT: bidirectional encoder representation from transformers.</p>
            </fn>
            <fn id="table7fn4">
              <p><sup>d</sup>BERT-RSC: bidirectional encoder representation from transformers-relation sign constraint.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>F1 scores with bars showing the standard deviations of the relation classification models. Attention-Bi-LSTM: attention-based bidirectional long short-term memory networks; Attention-Bi-LSTM-RSC: attention-based bidirectional long short-term memory-relation sign constraint; BERT: bidirectional encoder representation from transformers; BERT-RSC: bidirectional encoder representation from transformers-relation sign constraint.</p>
          </caption>
          <graphic xlink:href="medinform_v9i7e27955_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>PP Results</title>
        <p>Based on the experimental results presented above, we selected the BERT model for NER and RC. Note that instead of using the annotated data, we directly used the output of the NER model as the input for RC and employed the PP module to analyze the triples extracted by NER and RC to verify the performance of the IE system. We randomly selected 50 reports, for which both the annotators manually answered the 22 questions. <xref ref-type="table" rid="table8">Table 8</xref> shows the number of positive answers annotated to each question in the 50 reports and the experimental results of the IE system for each question. The experimental results prove that the IE system achieves a macro-F1 score of 94.57% and a micro-F1 score of 96.74%, indicating that the system can effectively extract information related to lung cancer staging from CT reports.</p>
        <p>By analyzing the incorrect answers, we found that the main reason for inaccurate extraction was that some entities or relations were not recognized by the system. For example, missing “Mass” or “At” relations made it impossible to determine the relative position between the primary tumor and other nodules, resulting in low recall values of Q6, Q9, and Q18. Besides, missing “Bronchus,” “PAOP,” “Vessel,” “Density,” and “Enhancement” entities led to low recall values of Q3, Q5, Q7, Q21, and Q22 when relevant descriptions were inherently scarce.</p>
        <table-wrap position="float" id="table8">
          <label>Table 8</label>
          <caption>
            <p>Experimental results of the developed information extraction system.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>No.</td>
                <td>Number of positive answers annotated</td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td>F1 score (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>50</td>
                <td>100</td>
                <td>100</td>
                <td>100</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>47</td>
                <td>97.83</td>
                <td>95.74</td>
                <td>96.77</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>16</td>
                <td>100</td>
                <td>87.50</td>
                <td>93.33</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>27</td>
                <td>100</td>
                <td>96.3</td>
                <td>98.11</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>6</td>
                <td>83.33</td>
                <td>83.33</td>
                <td>83.33</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>17</td>
                <td>100</td>
                <td>82.35</td>
                <td>90.32</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>5</td>
                <td>100</td>
                <td>80</td>
                <td>88.89</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>2</td>
                <td>100</td>
                <td>100</td>
                <td>100</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>14</td>
                <td>100</td>
                <td>85.71</td>
                <td>92.31</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>28</td>
                <td>100</td>
                <td>100</td>
                <td>100</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>18</td>
                <td>100</td>
                <td>100</td>
                <td>100</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>22</td>
                <td>95.45</td>
                <td>95.45</td>
                <td>95.45</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>1</td>
                <td>100</td>
                <td>100</td>
                <td>100</td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>19</td>
                <td>95</td>
                <td>100</td>
                <td>97.44</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>6</td>
                <td>100</td>
                <td>100</td>
                <td>100</td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>5</td>
                <td>100</td>
                <td>100</td>
                <td>100</td>
              </tr>
              <tr valign="top">
                <td>17</td>
                <td>20</td>
                <td>95</td>
                <td>95</td>
                <td>95</td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>5</td>
                <td>80</td>
                <td>80</td>
                <td>80</td>
              </tr>
              <tr valign="top">
                <td>19</td>
                <td>2</td>
                <td>100</td>
                <td>100</td>
                <td>100</td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>28</td>
                <td>96.3</td>
                <td>92.86</td>
                <td>94.55</td>
              </tr>
              <tr valign="top">
                <td>21</td>
                <td>14</td>
                <td>100</td>
                <td>85.71</td>
                <td>92.31</td>
              </tr>
              <tr valign="top">
                <td>22</td>
                <td>16</td>
                <td>85.71</td>
                <td>80</td>
                <td>82.76</td>
              </tr>
              <tr valign="top">
                <td>Macroaverage</td>
                <td>
                  <break/>
                </td>
                <td>96.76</td>
                <td>92</td>
                <td>94.57</td>
              </tr>
              <tr valign="top">
                <td>Microaverage</td>
                <td>
                  <break/>
                </td>
                <td>97.49</td>
                <td>95.99</td>
                <td>96.74</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, we developed an IE system to extract information related to lung cancer staging from CT reports automatically. The experimental results indicate that the IE system can effectively extract the useful entities and relations using the NER and RC models and accurately obtain the answers to the questions about lung cancer staging using the PP module. The extracted information shows significant potential to support further research about accurate lung cancer clinical staging.</p>
        <p>Although the macro-F1 score of NER is only 80.97%, which seems insufficient to support RC and PP, the IE system still achieves satisfactory results. The main reason is that the PP module exploits the key characters in the extracted entities or only the presence of the entities to obtain the answers but does not need the complete entities. For example, the annotation of the sentence “右肺下叶基底段见软组织密度肿块” is [Location_B, Location_I, Location_I, Location_I, Location_I, Location_I, Location_I, O, Mass_B, Mass_I, Mass_I, Mass_I, Mass_I, Mass_I, Mass_I], but the NER result is [Location_B, Location_I, Location_I, Location_I, Location_I, O, Location_I, O, Mass_B, Mass_I, Mass_I, Mass_I, Mass_I, Mass_I, Mass_I], which means the Location entity extracted is merely “右肺下叶基.” However, this partial Location entity is correctly linked to the Mass entity “软组织密度肿块” with an “At” relation by the RC model, and the key characters “右” and “下” in the Location entity can support the following PP step. The high macro-F1 and micro-F1 of the inexact matching scheme indicate that most of the entities can be extracted completely or partially by the NER model. Furthermore, the extractions cover most of the key characters needed during the PP step.</p>
        <p>For the RC task, all the four models achieve satisfactory performances. This is because the descriptions are similar in many sentences so that the models can easily learn these patterns. However, for the Related relation, none of the models obtain the perfect performance. The main reason is that some types of entities like “Vertebral Body” and “Vessel” are rare and have diverse descriptions, making it difficult for these models to learn the corresponding patterns. The addition of RSC may make the descriptions more uniform so that the models may learn the patterns more easily.</p>
        <p>For the NER and RC tasks, the advanced pretrained BERT model achieves better performance compared to the conventional CNN and RNN methods, thus verifying the superiority of large language representation models for various NLP tasks.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Although the rule-based PP module can accurately obtain the answers to the defined questions by analyzing the extracted entities and relations, these hard-coded rules are difficult to maintain and update. Furthermore, for better use of clinical knowledge (eg, enlarged lymph nodes with a minimum diameter greater than 10 mm are often considered metastatic), we need to establish a more comprehensive knowledge base to analyze the extracted information. Ontology, as a formal representation of medical knowledge, has become the standard method to develop knowledge bases [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>]. In future, we can use the Web Ontology Language (OWL) [<xref ref-type="bibr" rid="ref51">51</xref>] to construct the knowledge graph and employ the Semantic Web Rule Language (SWRL) [<xref ref-type="bibr" rid="ref52">52</xref>] to develop the reasoning rules for lung cancer staging.</p>
        <p>In this study, we explored the feasibility of extracting information related to lung cancer staging from CT reports using an NER+RC+PP pipeline in a single hospital. When generalizing this approach to other hospitals, the entity and relation definitions as well as the annotation strategy can be important references for the same application, and the developed pipeline can also be reused. However, if researchers want to customize the entity types or relation types to suit their purpose or if the writing style of CT reports is significantly different from that in the reports that we used, fine-tuning of BERT using the newly annotated reports may be a possible way to obtain satisfactory generalization.</p>
      </sec>
      <sec>
        <title>Future Research</title>
        <p>In the current study, pathological staging was not applied as the gold standard to evaluate the correctness of the extracted results. This is mainly because in clinical practice, clinicians use not only CT but also PET, MRI, and other diagnostic modalities to stage patients. Therefore, it is insufficient to use only the information extracted from the CT report to stage the patients. In future, we plan to extract staging information from other examination reports and use this multisource information to verify the staging correctness from a more comprehensive perspective. Moreover, by combining various details such as laboratory tests, disease history, and radiomics data, we can employ advanced machine learning algorithms to develop clinical staging prediction models to further alleviate the large number of disagreements between clinical and pathological stages.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, we developed an IE system to extract lung cancer staging information from CT reports automatically using NLP techniques. Experimental results obtained using real clinical data demonstrated that the IE system could effectively extract the relevant entities and relations using the NER and RC models. It could also accurately answer the staging questions using the rule-based PP module, thus proving the potential of this system for lung cancer staging verification and clinical staging prediction.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Parsed questions about lung cancer staging.</p>
        <media xlink:href="medinform_v9i7e27955_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 123 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Annotation guideline.</p>
        <media xlink:href="medinform_v9i7e27955_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 167 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Postprocessing rules.</p>
        <media xlink:href="medinform_v9i7e27955_app3.pdf" xlink:title="PDF File  (Adobe PDF File), 101 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Evaluation metrics.</p>
        <media xlink:href="medinform_v9i7e27955_app4.pdf" xlink:title="PDF File  (Adobe PDF File), 52 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Hyperparameters of the named entity recognition and relation classification models. Attention-Bi-LSTM-RSC: attention-based bidirectional long short-term memory-relation sign constraint; Bi-LSTM-CRF: bidirectional long short-term memory networks-conditional random field; BERT: bidirectional encoder representation from transformers; BERT-RSC: bidirectional encoder representation from transformers-relation sign constraint; ID-CNN-CRF: iterated dilated convolutional neural networks-conditional random field.</p>
        <media xlink:href="medinform_v9i7e27955_app5.pdf" xlink:title="PDF File  (Adobe PDF File), 115 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">Attention-Bi-LSTM</term>
          <def>
            <p>attention-based bidirectional long short-term memory networks</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">Bi-LSTM</term>
          <def>
            <p>bidirectional long short-term memory networks</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">Bi-LSTM-CRF</term>
          <def>
            <p>bidirectional long short-term memory networks- conditional random field</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BERT</term>
          <def>
            <p>bidirectional encoder representation from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CLIP</term>
          <def>
            <p>Cancer of Liver Italian Program</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CRF</term>
          <def>
            <p>conditional random field</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">CT</term>
          <def>
            <p>computed tomography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">ID-CNN</term>
          <def>
            <p>iterated dilated convolutional neural networks</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">ID-CNN-CRF</term>
          <def>
            <p>iterated dilated convolutional neural networks-conditional random field</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">IE</term>
          <def>
            <p>information extraction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">MRI</term>
          <def>
            <p>magnetic resonance imaging</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">NER</term>
          <def>
            <p>named entity recognition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">OWL</term>
          <def>
            <p>Web Ontology Language</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">PAOP</term>
          <def>
            <p>pulmonary atelectasis/obstructive pneumonitis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">PET</term>
          <def>
            <p>positron emission tomography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">PP</term>
          <def>
            <p>postprocessing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">RC</term>
          <def>
            <p>relation classification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">RNN</term>
          <def>
            <p>recurrent neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">RSC</term>
          <def>
            <p>relation sign constraint</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb21">SNOMED CT</term>
          <def>
            <p>Systematized Nomenclature of Medicine Clinical Terms</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb22">SWRL</term>
          <def>
            <p>Semantic Web Rule Language</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb23">UMLS</term>
          <def>
            <p>Unified Machine Language System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the National Key R&#38;D Program of China (grant 2018YFC0910700).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>DH, SL, XL, and NW conceptualized the study. SL acquired the clinical data. SL, YW, and HZ annotated the data. HZ, DH, and YW designed and implemented the algorithms and conducted the experiments. DH, HZ, YW, and SL analyzed the experimental results. DH wrote the manuscript with revision assistance from SL, XL, and NW. All authors have read and approved the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bray</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ferlay</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Soerjomataram</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Siegel</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Torre</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Jemal</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Global cancer statistics 2018: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title>
          <source>CA Cancer J Clin</source>
          <year>2018</year>
          <month>11</month>
          <volume>68</volume>
          <issue>6</issue>
          <fpage>394</fpage>
          <lpage>424</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3322/caac.21492"/>
          </comment>
          <pub-id pub-id-type="doi">10.3322/caac.21492</pub-id>
          <pub-id pub-id-type="medline">30207593</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ettinger</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Aggarwal</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Aisner</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Akerley</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bauman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bharat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bruno</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chirieac</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>D'Amico</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dilling</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dobelbower</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gettinger</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Govindan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gubens</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hennon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Horn</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lackner</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lanuti</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Leal</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Loo</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Martins</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Otterson</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Reckamp</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Riely</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Schild</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shapiro</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Stevenson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Swanson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tauer</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gregory</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>NCCN clinical practice guidelines in oncology</article-title>
          <source>Non-Small Cell Lung Cancer</source>
          <access-date>2019-09-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nccn.org/guidelines/guidelines-detail?category=1&#38;id=1450">https://www.nccn.org/guidelines/guidelines-detail?category=1&#38;id=1450</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Navani</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Tierney</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Stephens</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Burdett</surname>
              <given-names>S</given-names>
            </name>
            <collab>NSCLC Meta-analysis Collaborative Group</collab>
          </person-group>
          <article-title>The accuracy of clinical staging of stage I-IIIa non-small cell lung cancer: an analysis based on individual participant data</article-title>
          <source>Chest</source>
          <year>2019</year>
          <month>03</month>
          <volume>155</volume>
          <issue>3</issue>
          <fpage>502</fpage>
          <lpage>509</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0012-3692(18)32607-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.chest.2018.10.020</pub-id>
          <pub-id pub-id-type="medline">30391190</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(18)32607-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC6435782</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Heineman</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ten Berge</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Daniels</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Versteegh</surname>
              <given-names>MI</given-names>
            </name>
            <name name-style="western">
              <surname>Marang-van de Mheen</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wouters</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Schreurs</surname>
              <given-names>WH</given-names>
            </name>
          </person-group>
          <article-title>The quality of staging non-small cell lung cancer in the Netherlands: data from the Dutch lung surgery audit</article-title>
          <source>Ann Thorac Surg</source>
          <year>2016</year>
          <month>11</month>
          <volume>102</volume>
          <issue>5</issue>
          <fpage>1622</fpage>
          <lpage>1629</lpage>
          <pub-id pub-id-type="doi">10.1016/j.athoracsur.2016.06.071</pub-id>
          <pub-id pub-id-type="medline">27665479</pub-id>
          <pub-id pub-id-type="pii">S0003-4975(16)30787-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kazerooni</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Baum</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Eapen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ettinger</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ferguson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Klippenstein</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lackner</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Leard</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lennes</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Massion</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mazzone</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Merritt</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Midthun</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Onaitis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pipavath</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pratt</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Puri</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Reddy</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Reid</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rotter</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sachs</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sands</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schahath</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tanoue</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Travis</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Vachani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>NCCN clinical practice guidelines in oncology</article-title>
          <source>Lung Cancer Screening</source>
          <access-date>2019-09-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nccn.org/guidelines/guidelines-detail?category=2&#38;id=1441">https://www.nccn.org/guidelines/guidelines-detail?category=2&#38;id=1441</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yim</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yetisgen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>WP</given-names>
            </name>
            <name name-style="western">
              <surname>Kwan</surname>
              <given-names>SW</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing in oncology: a review</article-title>
          <source>JAMA Oncol</source>
          <year>2016</year>
          <month>06</month>
          <day>01</day>
          <volume>2</volume>
          <issue>6</issue>
          <fpage>797</fpage>
          <lpage>804</lpage>
          <pub-id pub-id-type="doi">10.1001/jamaoncol.2016.0213</pub-id>
          <pub-id pub-id-type="medline">27124593</pub-id>
          <pub-id pub-id-type="pii">2517402</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sheikhalishahi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Lavelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rinaldi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Osmani</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of clinical notes on chronic diseases: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>04</month>
          <day>27</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>e12239</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/2/e12239/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12239</pub-id>
          <pub-id pub-id-type="medline">31066697</pub-id>
          <pub-id pub-id-type="pii">v7i2e12239</pub-id>
          <pub-id pub-id-type="pmcid">PMC6528438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Detterbeck</surname>
              <given-names>FC</given-names>
            </name>
            <name name-style="western">
              <surname>Boffa</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Tanoue</surname>
              <given-names>LT</given-names>
            </name>
          </person-group>
          <article-title>The eighth edition lung cancer stage classification</article-title>
          <source>Chest</source>
          <year>2017</year>
          <month>01</month>
          <volume>151</volume>
          <issue>1</issue>
          <fpage>193</fpage>
          <lpage>203</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chest.2016.10.010</pub-id>
          <pub-id pub-id-type="medline">27780786</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(16)60780-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Strubell</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Verga</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Belanger</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>McCallum</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Fast and accurate entity recognition with iterated dilated convolutions</article-title>
          <source>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2017</year>
          <conf-name>Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>Sept 07-11</conf-date>
          <conf-loc>Copenhagen, Denmark</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>2670</fpage>
          <lpage>2680</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/D17-1283</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Bidirectional LSTM-CRF models for sequence tagging</article-title>
          <source>ArXiv.</source>
          <comment>Preprint posted online on Aug 9, 2015
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1508.01991v1"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>ArXiv.</source>
          <comment>Preprint posted online on Oct 11, 2018</comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Datta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bernstam</surname>
              <given-names>EV</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>A frame semantic overview of NLP-based information extraction for cancer-related EHR notes</article-title>
          <source>J Biomed Inform</source>
          <year>2019</year>
          <month>10</month>
          <day>04</day>
          <volume>100</volume>
          <fpage>103301</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(19)30221-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2019.103301</pub-id>
          <pub-id pub-id-type="medline">31589927</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(19)30221-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Lawley</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hansen</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Bowman</surname>
              <given-names>RV</given-names>
            </name>
            <name name-style="western">
              <surname>Clarke</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Duhig</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>Colquist</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Symbolic rule-based classification of lung cancer stages from free-text pathology reports</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <month>07</month>
          <volume>17</volume>
          <issue>4</issue>
          <fpage>440</fpage>
          <lpage>445</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/jamia/article/17/4/440/866997"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2010.003707</pub-id>
          <pub-id pub-id-type="medline">20595312</pub-id>
          <pub-id pub-id-type="pii">17/4/440</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995652</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Warner</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Neuss</surname>
              <given-names>MN</given-names>
            </name>
            <name name-style="western">
              <surname>Warner</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Neuss</surname>
              <given-names>MN</given-names>
            </name>
          </person-group>
          <article-title>ReCAP: feasibility and accuracy of extracting cancer stage information from narrative electronic health record data</article-title>
          <source>J Oncol Pract</source>
          <year>2016</year>
          <month>02</month>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>157</fpage>
          <lpage>158</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ascopubs.org/doi/10.1200/JOP.2015.004622"/>
          </comment>
          <pub-id pub-id-type="doi">10.1200/JOP.2015.004622</pub-id>
          <pub-id pub-id-type="medline">26306621</pub-id>
          <pub-id pub-id-type="pii">JOP.2015.004622</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schroeck</surname>
              <given-names>FR</given-names>
            </name>
            <name name-style="western">
              <surname>Lynch</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>MacKenzie</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Seigne</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Robertson</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Goodney</surname>
              <given-names>PP</given-names>
            </name>
            <name name-style="western">
              <surname>Sirovich</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Extent of risk-aligned surveillance for cancer recurrence among patients with early-stage bladder cancer</article-title>
          <source>JAMA Netw Open</source>
          <year>2018</year>
          <month>09</month>
          <volume>1</volume>
          <issue>5</issue>
          <fpage>e183442</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jamanetwork.com/journals/jamanetworkopen/fullarticle/10.1001/jamanetworkopen.2018.3442"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2018.3442</pub-id>
          <pub-id pub-id-type="medline">30465041</pub-id>
          <pub-id pub-id-type="pmcid">PMC6241521</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cary</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Church</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Eckert</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ouyang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Haggstrom</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>Development of a novel algorithm to identify staging and lines of therapy for bladder cancer</article-title>
          <source>J Clin Oncol</source>
          <year>2017</year>
          <month>05</month>
          <day>20</day>
          <volume>35</volume>
          <issue>15_suppl</issue>
          <fpage>e18235</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ascopubs.org/doi/abs/10.1200/JCO.2017.35.15_suppl.e18235"/>
          </comment>
          <pub-id pub-id-type="doi">10.1200/jco.2017.35.15_suppl.e18235</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schroeck</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lynch</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Robertson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Seigne</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Goodney</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sirovich</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>MP44-01 a national study of risk-aligned surveillance practice for non-muscle invasive bladder cancer</article-title>
          <source>J Urol</source>
          <year>2018</year>
          <month>04</month>
          <volume>199</volume>
          <issue>4S</issue>
          <fpage>e587</fpage>
          <pub-id pub-id-type="doi">10.1016/j.juro.2018.02.1420</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>AAlAbdulsalam</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Garvin</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Redd</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Carter</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Sweeny</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Meystre</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>Automated extraction and classification of cancer stage mentions from unstructured text fields in a central cancer registry</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2018</year>
          <month>05</month>
          <volume>2017</volume>
          <fpage>16</fpage>
          <lpage>25</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29888032"/>
          </comment>
          <pub-id pub-id-type="medline">29888032</pub-id>
          <pub-id pub-id-type="pmcid">PMC5961766</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nunes</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Dalvi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Seeger</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Abstract P5-08-20: a real-world evidence study to define the prevalence of endocrine therapy-naïve hormone receptor-positive locally advanced or metastatic breast cancer in the US</article-title>
          <source>Cancer Res</source>
          <year>2017</year>
          <month>02</month>
          <volume>77</volume>
          <issue>4 Supplement</issue>
          <fpage>P5-08-20</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cancerres.aacrjournals.org/content/77/4_Supplement/P5-08-20"/>
          </comment>
          <pub-id pub-id-type="doi">10.1158/1538-7445.SABCS16-P5-08-20</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Giri</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Levinson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Keene</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Holman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lovett</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Stansel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Snyder</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fromal</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cozzi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Khabele</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Beeghly-Fadiel</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Abstract 4229: preliminary results from the pharmacogenetics ovarian cancer knowledge to individualize treatment (POCKIT) study</article-title>
          <source>Cancer Res</source>
          <year>2018</year>
          <month>07</month>
          <volume>78</volume>
          <issue>13</issue>
          <fpage>4229</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cancerres.aacrjournals.org/content/78/13_Supplement/4229"/>
          </comment>
          <pub-id pub-id-type="doi">10.1158/1538-7445.AM2018-4229</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Tseytlin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Finan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Castine</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Medvedeva</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hochheiser</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chavan</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobson</surname>
              <given-names>RS</given-names>
            </name>
          </person-group>
          <article-title>DeepPhe: a natural language processing system for extracting cancer phenotypes from clinical records</article-title>
          <source>Cancer Res</source>
          <year>2017</year>
          <month>11</month>
          <day>01</day>
          <volume>77</volume>
          <issue>21</issue>
          <fpage>e115</fpage>
          <lpage>e118</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://cancerres.aacrjournals.org/cgi/pmidlookup?view=long&#38;pmid=29092954"/>
          </comment>
          <pub-id pub-id-type="doi">10.1158/0008-5472.CAN-17-0615</pub-id>
          <pub-id pub-id-type="medline">29092954</pub-id>
          <pub-id pub-id-type="pii">77/21/e115</pub-id>
          <pub-id pub-id-type="pmcid">PMC5690492</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ping</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Information extraction for tracking liver cancer patients' statuses: from mixture of clinical narrative report types</article-title>
          <source>Telemed J E Health</source>
          <year>2013</year>
          <month>09</month>
          <volume>19</volume>
          <issue>9</issue>
          <fpage>704</fpage>
          <lpage>710</lpage>
          <pub-id pub-id-type="doi">10.1089/tmj.2012.0241</pub-id>
          <pub-id pub-id-type="medline">23869395</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yim</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Denman</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kwan</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Yetisgen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Tumor information extraction in radiology reports for hepatocellular carcinoma patients</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2016</year>
          <month>07</month>
          <volume>2016</volume>
          <fpage>455</fpage>
          <lpage>464</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27570686"/>
          </comment>
          <pub-id pub-id-type="medline">27570686</pub-id>
          <pub-id pub-id-type="pmcid">PMC5001784</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Using natural language processing to extract clinically useful information from Chinese electronic medical records</article-title>
          <source>Int J Med Inform</source>
          <year>2019</year>
          <month>04</month>
          <volume>124</volume>
          <fpage>6</fpage>
          <lpage>12</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2019.01.004</pub-id>
          <pub-id pub-id-type="medline">30784428</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(18)30594-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bozkurt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lipson</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Senol</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>DL</given-names>
            </name>
          </person-group>
          <article-title>Automatic abstraction of imaging observations with their characteristics from mammography reports</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2015</year>
          <month>04</month>
          <volume>22</volume>
          <issue>e1</issue>
          <fpage>e81</fpage>
          <lpage>92</lpage>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2014-003009</pub-id>
          <pub-id pub-id-type="medline">25352567</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2014-003009</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bozkurt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gimenez</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Burnside</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Gulkesen</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>DL</given-names>
            </name>
          </person-group>
          <article-title>Using automatically extracted information from mammography reports for decision-support</article-title>
          <source>J Biomed Inform</source>
          <year>2016</year>
          <month>08</month>
          <volume>62</volume>
          <fpage>224</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(16)30055-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2016.07.001</pub-id>
          <pub-id pub-id-type="medline">27388877</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(16)30055-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5108519</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jauregi Unanue</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Zare Borzeshi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Piccardi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Recurrent neural networks with specialized word embeddings for health-domain named-entity recognition</article-title>
          <source>J Biomed Inform</source>
          <year>2017</year>
          <month>12</month>
          <volume>76</volume>
          <fpage>102</fpage>
          <lpage>109</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30244-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.11.007</pub-id>
          <pub-id pub-id-type="medline">29146561</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30244-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Relation classification via recurrent neural network</article-title>
          <source>ArXiv.</source>
          <comment>Preprint posted online on Aug 5, 2015
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1508.01006"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Clinical named entity recognition from Chinese electronic health records via machine learning methods</article-title>
          <source>JMIR Med Inform</source>
          <year>2018</year>
          <month>12</month>
          <day>17</day>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>e50</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2018/4/e50/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.9965</pub-id>
          <pub-id pub-id-type="medline">30559093</pub-id>
          <pub-id pub-id-type="pii">v6i4e50</pub-id>
          <pub-id pub-id-type="pmcid">PMC6315256</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Entity recognition from clinical texts via recurrent neural network</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2017</year>
          <month>07</month>
          <day>05</day>
          <volume>17</volume>
          <issue>Suppl 2</issue>
          <fpage>67</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-017-0468-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-017-0468-7</pub-id>
          <pub-id pub-id-type="medline">28699566</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-017-0468-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5506598</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Relation classification via convolutional deep neural network</article-title>
          <year>2014</year>
          <conf-name>The 25th International Conference on Computational Linguistics</conf-name>
          <conf-date>August 23-29, 2014</conf-date>
          <conf-loc>Dublin, Ireland</conf-loc>
          <publisher-loc>Dublin, Ireland</publisher-loc>
          <publisher-name>Dublin City University and Association for Computational Linguistics</publisher-name>
          <fpage>2335</fpage>
          <lpage>2344</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Attention-based bidirectional long short-term memory networks for relation classification</article-title>
          <year>2016</year>
          <conf-name>The 54th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>August 7-12, 2016</conf-date>
          <conf-loc>Berlin, Germany</conf-loc>
          <publisher-loc>Berlin, Germany</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>A</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P16-2034/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/p16-2034</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Recurrent neural networks for classifying relations in clinical notes</article-title>
          <source>J Biomed Inform</source>
          <year>2017</year>
          <month>08</month>
          <volume>72</volume>
          <fpage>85</fpage>
          <lpage>95</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30162-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.07.006</pub-id>
          <pub-id pub-id-type="medline">28694119</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30162-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC6657689</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Si</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>A frame-based NLP system for cancer-related information extraction</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2018</year>
          <volume>2018</volume>
          <fpage>1524</fpage>
          <lpage>1533</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30815198"/>
          </comment>
          <pub-id pub-id-type="medline">30815198</pub-id>
          <pub-id pub-id-type="pmcid">PMC6371330</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>JX</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Christian</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Fearn</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Tourassi</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Ramanthan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Hierarchical attention networks for information extraction from cancer pathology reports</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2018</year>
          <month>03</month>
          <day>01</day>
          <volume>25</volume>
          <issue>3</issue>
          <fpage>321</fpage>
          <lpage>330</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29155996"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocx131</pub-id>
          <pub-id pub-id-type="medline">29155996</pub-id>
          <pub-id pub-id-type="pii">4636780</pub-id>
          <pub-id pub-id-type="pmcid">PMC7282502</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rothschild</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>Agreement, the f-measure, and reliability in information retrieval</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2005</year>
          <month>05</month>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>296</fpage>
          <lpage>298</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=15684123"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M1733</pub-id>
          <pub-id pub-id-type="medline">15684123</pub-id>
          <pub-id pub-id-type="pii">M1733</pub-id>
          <pub-id pub-id-type="pmcid">PMC1090460</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stenetorp</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pyysalo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Topić</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ohta</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ananiadou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tsujii</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>brat: a web-based tool for NLP-assisted text annotation</article-title>
          <source>Proceedings of the 13th Conference of the European Chapter of the Association for Computational Linguistics</source>
          <year>2012</year>
          <conf-name>The 13th Conference of the European Chapter of the Association for Computational Linguistics</conf-name>
          <conf-date>April 23-27, 2012</conf-date>
          <conf-loc>Avignon, France</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>102</fpage>
          <lpage>107</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Efficient estimation of word representations in vector space</article-title>
          <source>ArXiv.</source>
          <comment>Preprint posted online on Jan 16, 2013
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1301.3781"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>jieba</article-title>
          <source>Jieba Chinese word segmentation module</source>
          <access-date>2021-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/fxsjy/jieba">https://github.com/fxsjy/jieba</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A comprehensive study of named entity recognition in Chinese clinical text</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2014</year>
          <month>09</month>
          <volume>21</volume>
          <issue>5</issue>
          <fpage>808</fpage>
          <lpage>814</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24347408"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-002381</pub-id>
          <pub-id pub-id-type="medline">24347408</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2013-002381</pub-id>
          <pub-id pub-id-type="pmcid">PMC4147609</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Extracting important information from Chinese Operation Notes with natural language processing methods</article-title>
          <source>J Biomed Inform</source>
          <year>2014</year>
          <month>04</month>
          <volume>48</volume>
          <fpage>130</fpage>
          <lpage>136</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(14)00006-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2013.12.017</pub-id>
          <pub-id pub-id-type="medline">24486562</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(14)00006-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mehrabi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Clinical information extraction applications: a literature review</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>01</month>
          <volume>77</volume>
          <fpage>34</fpage>
          <lpage>49</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30256-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.11.011</pub-id>
          <pub-id pub-id-type="medline">29162496</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30256-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC5771858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hruby</surname>
              <given-names>GW</given-names>
            </name>
            <name name-style="western">
              <surname>Rusanov</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>EliIE: an open-source information extraction system for clinical trial eligibility criteria</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2017</year>
          <month>11</month>
          <day>01</day>
          <volume>24</volume>
          <issue>6</issue>
          <fpage>1062</fpage>
          <lpage>1071</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28379377"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocx019</pub-id>
          <pub-id pub-id-type="medline">28379377</pub-id>
          <pub-id pub-id-type="pii">3098256</pub-id>
          <pub-id pub-id-type="pmcid">PMC6259668</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Stenner</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Doan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Waitman</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>MedEx: a medication information extraction system for clinical narratives</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <month>01</month>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>19</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=20064797"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M3378</pub-id>
          <pub-id pub-id-type="medline">20064797</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995636</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Koltun</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Multi-scale context aggregation by dilated convolutions</article-title>
          <source>arXiv e-prints</source>
          <year>2015</year>
          <month>11</month>
          <day>23</day>
          <fpage>07122</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1511.07122"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>Ł</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>ArXiv.</source>
          <comment>Preprint posted online on June 12, 2017</comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>GuoDong</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jian</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jie</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Min</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Exploring various knowledge in relation extraction</article-title>
          <source>Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics</source>
          <year>2005</year>
          <month>06</month>
          <conf-name>The 43rd Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>June 25-30, 2005</conf-date>
          <conf-loc>Ann Arbor, Michigan</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>427</fpage>
          <lpage>434</lpage>
          <pub-id pub-id-type="doi">10.3115/1219840.1219893</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mintz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bills</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Snow</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jurafsky</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Distant supervision for relation extraction without labeled data</article-title>
          <source>Proceedings of the Joint Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing of the AFNLP</source>
          <year>2009</year>
          <month>08</month>
          <conf-name>The Joint Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing of the AFNLP</conf-name>
          <conf-date>Aug 7-12, 2009</conf-date>
          <conf-loc>Suntec, Singapore</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>1003</fpage>
          <lpage>1011</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bau</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A recommendation system based on domain ontology and SWRL for anti-diabetic drugs selection</article-title>
          <source>Expert Syst  Appl</source>
          <year>2012</year>
          <month>3</month>
          <volume>39</volume>
          <issue>4</issue>
          <fpage>3995</fpage>
          <lpage>4006</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.sciencedirect.com/science/article/abs/pii/S0957417411013704"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.eswa.2011.09.061</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Design and development of a sharable clinical decision support system based on a semantic web service framework</article-title>
          <source>J Med Syst</source>
          <year>2016</year>
          <month>05</month>
          <volume>40</volume>
          <issue>5</issue>
          <fpage>118</fpage>
          <pub-id pub-id-type="doi">10.1007/s10916-016-0472-y</pub-id>
          <pub-id pub-id-type="medline">27002818</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10916-016-0472-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="web">
          <source>OWL 2 Web Ontology Language Document Overview (Second Edition)</source>
          <access-date>2021-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.w3.org/TR/owl2-overview/">https://www.w3.org/TR/owl2-overview/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <source>SWRL: A Semantic Web Rule Language Combining OWL and RuleML</source>
          <access-date>2021-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.w3.org/Submission/SWRL/">https://www.w3.org/Submission/SWRL/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
