<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i5e17643</article-id>
      <article-id pub-id-type="pmid">32348257</article-id>
      <article-id pub-id-type="doi">10.2196/17643</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>A Graph Convolutional Network–Based Method for Chemical-Protein Interaction Extraction: Algorithm Development</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Hao</surname>
            <given-names>Tianyong</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Tang</surname>
            <given-names>Buzhou</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hua</surname>
            <given-names>My</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Liyuan</surname>
            <given-names>Tao</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Erniu</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7233-8869</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Fan</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3591-9394</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>Zhihao</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>College of Computer Science and Technology</institution>
            <institution>Dalian University of Technology</institution>
            <addr-line>No 2, Linggong Road</addr-line>
            <addr-line>Ganjingzi District</addr-line>
            <addr-line>Dalian, </addr-line>
            <country>China</country>
            <phone>86 13190114398</phone>
            <email>yangzh@dlut.edu.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6186-2024</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Lei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8420-6860</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Yin</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2522-0969</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>Hongfei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0872-7688</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Jian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4656-7446</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>College of Computer Science and Technology</institution>
        <institution>Dalian University of Technology</institution>
        <addr-line>Dalian</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Beijing Institute of Health Administration and Medical Information</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Zhihao Yang <email>yangzh@dlut.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>19</day>
        <month>5</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>5</issue>
      <elocation-id>e17643</elocation-id>
      <history>
        <date date-type="received">
          <day>30</day>
          <month>12</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>15</day>
          <month>2</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>14</day>
          <month>3</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>19</day>
          <month>3</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Erniu Wang, Fan Wang, Zhihao Yang, Lei Wang, Yin Zhang, Hongfei Lin, Jian Wang. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 19.05.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2020/5/e17643/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Extracting the interactions between chemicals and proteins from the biomedical literature is important for many biomedical tasks such as drug discovery, medicine precision, and knowledge graph construction. Several computational methods have been proposed for automatic chemical-protein interaction (CPI) extraction. However, the majority of these proposed models cannot effectively learn semantic and syntactic information from complex sentences in biomedical texts.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>To relieve this problem, we propose a method to effectively encode syntactic information from long text for CPI extraction.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Since syntactic information can be captured from dependency graphs, graph convolutional networks (GCNs) have recently drawn increasing attention in natural language processing. To investigate the performance of a GCN on CPI extraction, this paper proposes a novel GCN-based model. The model can effectively capture sequential information and long-range syntactic relations between words by using the dependency structure of input sentences.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We evaluated our model on the ChemProt corpus released by BioCreative VI; it achieved an F-score of 65.17%, which is 1.07% higher than that of the state-of-the-art system proposed by Peng et al. As indicated by the significance test (<italic>P</italic>&#60;.001), the improvement is significant. It indicates that our model is effective in extracting CPIs. The GCN-based model can better capture the semantic and syntactic information of the sentence compared to other models, therefore alleviating the problems associated with the complexity of biomedical literature.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our model can obtain more information from the dependency graph than previously proposed models. Experimental results suggest that it is competitive to state-of-the-art methods and significantly outperforms other methods on the ChemProt corpus, which is the benchmark data set for CPI extraction.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>chemical-protein interaction</kwd>
        <kwd>graph convolutional network</kwd>
        <kwd>long-range syntactic</kwd>
        <kwd>dependency structure</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Biomedical literature has grown significantly with the development of biomedical technology, which contains a large amount of valuable chemical-protein interactions (CPIs). CPI extraction plays an important role in various biomedical tasks such as drug discovery, medicine precision, and knowledge graph construction [<xref ref-type="bibr" rid="ref1">1</xref>]. With the rapidly increasing volume of biomedical literature, it becomes time-and-resource–consuming to extract CPIs from biomedical literature manually. There are some computational methods that have been successfully proposed for automatic biomedical relation extraction [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. However, most previous studies focused on the extraction of drug-drug interactions, protein-protein interactions, and chemical-disease interactions; only a few attempts were developed to extract CPIs [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      <p>The BioCreative VI ChemProt shared task [<xref ref-type="bibr" rid="ref8">8</xref>] created the ChemProt data set, which is used in the development of CPI extraction methods. The current CPI extraction systems can be generally divided into two categories: the traditional machine learning–based methods and the neural network–based methods. The traditional machine learning–based methods conventionally train a CPI extractor by handcrafted features [<xref ref-type="bibr" rid="ref7">7</xref>]. The neural network–based methods can automatically learn powerful features to train a classifier, and therefore, have become a promising method for CPI extraction.</p>
      <p>Mehryary et al [<xref ref-type="bibr" rid="ref9">9</xref>] combined a support vector machine (SVM) and long short-term memory (LSTM) to extract CPIs and achieved a high F-score by a rich set of features. Warikoo et al [<xref ref-type="bibr" rid="ref10">10</xref>] also exploited a set of linguistic features to train a tree kernel classifier to obtain CPIs from biomedical literature. Generally, these methods depend heavily on feature engineering. Recently, attention mechanisms have been successfully used in many natural language processing tasks, and some works have employed it in CPI extraction. Liu et al [<xref ref-type="bibr" rid="ref11">11</xref>] aggregated an attention mechanism and gated recurrent units (GRU) to extend the LSTM model. Verga et al [<xref ref-type="bibr" rid="ref12">12</xref>] encoded pair-wise predictions over entire abstracts by synthesizing self-attention and convolutions. Corbett and Boyle [<xref ref-type="bibr" rid="ref13">13</xref>] employed multiple LSTM layers with unlabeled data to extract relations amongst the ChemProt corpus and achieved good performance. Peng et al [<xref ref-type="bibr" rid="ref14">14</xref>] applied an ensemble system to extract CPIs, which consists of three individual models, including SVM, convolutional neural network (CNN), and bi-directional long short-term memory (Bi-LSTM) modules. The system achieved an F-score of 64.1% and won the top rank in the BioCreative VI ChemProt shared task.</p>
      <p>However, most of the proposed methods only utilize the sequential information of sentences; syntactic information has not been carefully studied yet. Due to the presence of complex sentences in biomedical literature, it is difficult to effectively learn the semantic and syntactic information for some neural network–based models (eg, CNN [<xref ref-type="bibr" rid="ref15">15</xref>], LSTM [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref16">16</xref>], and GRU [<xref ref-type="bibr" rid="ref17">17</xref>]). To address this problem, we apply a graph convolutional network (GCN) [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>] for CPI extraction. The GCN can exploit dependency structure and capture long-range syntactic relations of input sentences. Therefore, it is more effective and precise than other modules for CPI extraction.</p>
      <p>Additionally, sentences in the biomedical literature are generally lengthy, so there is a considerable amount of irrelevant words. For example, in the sentence “Dasatinib (BMS-354825) is a novel orally bioavailable SRC/ABL inhibitor that has activity against multiple imatinib-resistant BCR-ABL isoforms in vitro that is presently showing considerable promise in early-phase clinical trials of chronic myeloid leukemia (CML),” “Dasatinib (BMS-354825) is a novel orally bioavailable SRC/ABL inhibitor” can already express the inhibitory relationship between the entities “Dasatinib” and “SRC.” Other words, which may affect the performance of the relation extractor, are irrelevant. Inspired by Zhang et al [<xref ref-type="bibr" rid="ref20">20</xref>], we apply a path-centric pruning strategy to incorporate relevant information while maximally reducing the influence of noisy words in long sentences. This strategy retains tokens that are up to distance <italic>N</italic> away from the dependency path in the lowest common ancestor (LCA) subtree [<xref ref-type="bibr" rid="ref21">21</xref>]. The experimental results prove that this strategy can improve the robustness of our model. The model achieves the best balance between noisy words and relevant words when <italic>N</italic> is set to 2.</p>
      <p>A single GCN model usually depends highly on correct parse trees to extract crucial information from sentences, while existing parsing algorithms produce imperfect trees in many cases. To further improve the robustness of our mode, we apply a Bi-LSTM network to obtain contextual information about word order or disambiguation. The compound model can better leverage local word patterns regardless of parsing quality.</p>
      <p>In summary, we propose a GCN-based model in this paper to extract CPIs. We evaluated our model on the ChemProt corpus, which is the benchmark data set for CPI extraction. To the best of our knowledge, this is the first study to use a GCN encoding syntactic graph for CPI extraction.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>The overall architecture of our model is presented in <xref rid="figure1" ref-type="fig">Figure 1</xref>. Our model contains three parts: the Bi-LSTM layer, the GCN layer, and the classification layer. In the model, a Bi-LSTM layer is applied first to capture local word patterns and output the representation of each word within the whole sentence. Subsequently, the contextualized representation and the dependency graph (with two directly attached dependencies) of input sentences are fed into the GCN layer to integrate dependency information into word representations. After that, a max-pooling layer is applied to generate the representation of the sentence and two target entities from word representations. Finally, these representations are concatenated and fed into a multilayer perceptron (MLP) for softmax classification. In the following section, we will introduce our model in detail.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>The overall architecture of our model. Bi-LSTM: bi-directional long short-term memory; GCN: graph convolutional network; POS: part-of-speech; MLP: multilayer perceptron; sub: subject; obj: object; hc: representation of chemical; hs: representation of sentence; hp: representation of protein; f: max-pooling function.</p>
          </caption>
          <graphic xlink:href="medinform_v8i5e17643_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>The Bi-LSTM Layer</title>
        <p>We adopt a Bi-LSTM layer to capture contextual information about word order and reduce the impact of parsing errors in our model. The Bi-LSTM layer is applied on the whole sentence to learn the representation of each word. Bi-LSTM can capture more comprehensive features by dealing with the input sequence from forward and backward directions, compared with unidirectional LSTM; it is the combination of the forward LSTM and backward LSTM.</p>
        <p>In the ChemProt corpus, some entities contain multiple types of words, especially the relation type “PART_OF,” which means one entity is part of another type of entity within a relation entity pair. For example, “thiazide-sensitive sodium-chloride cotransporter” is a gene entity, and “sodium-chloride” is a chemical entity. To reduce this interference, we apply prior knowledge of the entity type as a feature to improve CPI extraction.</p>
        <p>The input of the Bi-LSTM layer consists of three parts, including word embedding, part-of-speech (POS) embedding and entity type embedding. Given a sentence <italic>S = {w<sub>1</sub>,w<sub>2</sub>,…,w<sub>n</sub>}</italic>, the POS sequence <italic>P = {p<sub>1</sub>,p<sub>2</sub>,…,p<sub>n</sub>}</italic> can be obtained by the Stanford CoreNLP toolkit [<xref ref-type="bibr" rid="ref22">22</xref>], where <italic>w<sub>i</sub></italic> is the i-th word in a sentence and <italic>p<sub>i</sub></italic> is its POS. We obtain the sequence of entity types <italic>T = {t<sub>1</sub>,t<sub>2</sub>,…,t<sub>n</sub>}</italic> through the index information of the entity pairs in a sentence. We tagged entity tokens “chemical” or “gen” and other words “O.” The word embedding is initialized with pretrained word embedding, which is obtained by FastText [<xref ref-type="bibr" rid="ref23">23</xref>]. POS and entity type embedding are initialized randomly. The input of the model is denoted as follows:</p>
        <graphic xlink:href="medinform_v8i5e17643_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>For each token <italic>x<sub>i</sub></italic>, the forward LSTM and backward LSTM consider the contextual information before and after it, respectively. The final output is the concatenation of the two directions. The Bi-LSTM calculation process is presented as follows:</p>
        <graphic xlink:href="medinform_v8i5e17643_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>where 
        <inline-graphic xlink:href="medinform_v8i5e17643_fig7.png" xlink:type="simple" mimetype="image"/> 
        and 
        <inline-graphic xlink:href="medinform_v8i5e17643_fig8.png" xlink:type="simple" mimetype="image"/>
        denote the hidden states of the forward and backward LSTM of <italic>x<sub>i</sub></italic>, respectively. 
       <inline-graphic xlink:href="medinform_v8i5e17643_fig9.png" xlink:type="simple" mimetype="image"/>
        denotes concatenation operation.</p>
      </sec>
      <sec>
        <title>The GCN Layer</title>
        <p>GCNs can learn a state embedding, which contains the information of a neighborhood for each node in a graph. It has been proven that models or dependency-based models are very effective in relation extraction by capturing long-range syntactic relations [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref26">26</xref>]. In our model, we apply a GCN to improve the performance of CPI extraction by utilizing the dependency parse trees of the input sentences. In order to reduce the influence of noisy words in long sentences, we further apply a pruning strategy on the dependency trees to remove irrelevant words while maximally keeping crucial content.</p>
        <p>Given a sentence, we first apply the Stanford CoreNLP toolkit to get its dependency tree, which is considered as an undirected graph. Then, we apply a path-centric pruning strategy and retain two directly attached words around the shortest path at the LCA of the two entities [<xref ref-type="bibr" rid="ref20">20</xref>]. After that, we convert the subgraph into an adjacency matrix A. If there is a dependency relation between node <italic>i</italic> and <italic>j</italic>, is assigned with a value of 1. Finally, we apply a GCN over the output of Bi-LSTM and adjacency matrix A to get an updated hidden representation of <italic>h<sub>i</sub></italic>. This can be represented as shown in formula 5. In an L-layer GCN, if we use 
        <inline-graphic xlink:href="medinform_v8i5e17643_fig10.png" xlink:type="simple" mimetype="image"/> 
        as the input vector and 
        <inline-graphic xlink:href="medinform_v8i5e17643_fig11.png" xlink:type="simple" mimetype="image"/> 
        as the output vector for node <italic>i</italic> at the l-th layer, the graph convolution operation of the l-th layer can be represented as shown in formula 6.</p>
        <graphic xlink:href="medinform_v8i5e17643_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>where <italic>W<sup>(j)</sup></italic> and <italic>W<sup>(l)</sup></italic> are weight linear transformations, <italic>b<sup>(j)</sup></italic> and <italic>b<sup>(l)</sup></italic> are bias terms, and <italic>f</italic> is a nonlinear function (eg, a rectified linear unit [ReLU]). We could obtain the hidden representation of each token directly influenced by its neighbors no more than <italic>L</italic> edges apart in the dependency trees after applying an L-layer GCN over word vectors. To avoid a sentence representation favoring high-degree nodes regardless of the information carried in the node and to transfer information in 
        <inline-graphic xlink:href="medinform_v8i5e17643_fig10.png" xlink:type="simple" mimetype="image"/> to 
        <inline-graphic xlink:href="medinform_v8i5e17643_fig11.png" xlink:type="simple" mimetype="image"/>, we normalized the activations in the graph convolution before feeding it through a nonlinearity, and added self-loops to each node in the graph:</p>
        <graphic xlink:href="medinform_v8i5e17643_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>where 
        <inline-graphic xlink:href="medinform_v8i5e17643_fig14.png" xlink:type="simple" mimetype="image"/>
        . <italic>I</italic> is the <italic>n</italic> × <italic>n</italic> identity matrix, and 
        <inline-graphic xlink:href="medinform_v8i5e17643_fig15.png" xlink:type="simple" mimetype="image"/>
        is the degree of token <italic>i</italic> in the resulting graph.</p>
      </sec>
      <sec>
        <title>The Output and Classification Layer</title>
        <p>The CPI extraction can be regarded as a classification problem. Given a sentence <italic>S = {w<sub>1</sub>,w<sub>2,</sub>...,w<sub>n</sub>}</italic> where <italic>w<sub>i</sub></italic> is the <italic>i</italic>-th token, let <italic>S<sub>c</sub> = {w<sub>c1</sub>,w<sub>c2</sub>,...,w<sub>cn</sub>}</italic> and <italic>S<sub>p</sub> = {w<sub>p1</sub>,w<sub>p2</sub>,...,w<sub>pn</sub>}</italic> denote chemical sequence and protein sequence, respectively. The goal of CPI extraction is to predict the relation rR hold between the chemical <italic>S<sub>c</sub></italic> and gen <italic>S<sub>p</sub></italic>; otherwise, “no relation” is declared. After the Bi-LSTM and GCN layers, we can obtain the hidden representation of each token, which is influenced by not only local word patterns but also long-range words. To utilize these word representations for relation extraction, we mapped from <italic>h<sup>(L)</sup></italic> (<italic>n</italic> output vectors) to the sentence vector <italic>h<sub>sent</sub></italic>. The information close to entity tokens in the dependency trees is generally important in relation classification. Therefore, we also apply a max-pooling function to obtain entity pair representations <italic>h<sub>c</sub></italic> and <italic>h<sub>p</sub></italic> from <italic>h<sup>(L)</sup></italic> as follows:</p>
        <graphic xlink:href="medinform_v8i5e17643_fig16.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>where 
                <inline-graphic xlink:href="medinform_v8i5e17643_fig17.png" xlink:type="simple" mimetype="image"/> 
        denotes the output after L-layer GCN, and <italic>f</italic> denotes a max-pooling function.</p>
        <p>Then, we connect sentence representation with entity representation [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>] as a new representation, and feed it into a feed-forward neural network (FFNN) inspired by relational reasoning works:</p>
        <graphic xlink:href="medinform_v8i5e17643_fig18.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>Finally, we apply a linear layer followed by a softmax operation over the final representation <italic>h<sub>final</sub></italic> to obtain a probability distribution over chemical-protein relations and the computation is shown as follows:</p>
        <graphic xlink:href="medinform_v8i5e17643_fig19.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>where <italic>W<sub>r</sub></italic> and <italic>b<sub>r</sub></italic> are trainable parameters, and <italic>r</italic> is relation type.</p>
      </sec>
      <sec>
        <title>Evaluation Metrics</title>
        <p>In experiments, the Micro-average F-score is applied to evaluate the performance of our model, which is a harmonic mean of <italic>P</italic> and <italic>R</italic>, where <italic>P</italic> denotes precision and <italic>R</italic> denotes recall:</p>
        <graphic xlink:href="medinform_v8i5e17643_fig20.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p><italic>TP</italic>, <italic>FN</italic>, and <italic>FP</italic> denote true positive, false negative, and false positive, respectively.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Retrieval and Preprocessing</title>
        <p>CPI extraction aims to classify whether a semantic relation that holds between the chemical and protein entity pairs within a sentence or document. The BioCreative VI ChemProt task delivered the corpus as a manually annotated CPI data set that consists of training, development, and test sets. Each set includes the abstracts, entities, and relations files. <xref rid="figure2" ref-type="fig">Figure 2</xref> provides an example of the three files from the ChemProt training set.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Examples of the ChemProt corpus. CPI: chemical-protein interaction.</p>
          </caption>
          <graphic xlink:href="medinform_v8i5e17643_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The abstracts file provides the article identifier, title, and abstract document for each article. The entities file consists of the PubMed Unique Identifier (PMID), entity number, type of entity mentions, start and end character offset, and text string of entity mention. The relations file is composed of the PMID, CPI relation class, evaluation type, and CPI relation and interactor arguments. In the ChemProt corpus, there are 10-type relation classes, and each relation class includes one or multiple relation types (<xref ref-type="table" rid="table1">Table 1</xref>). Although there are 10-type relation classes in ChemProt corpus, only five are used for evaluation purposes (ie, CPR:3, CPR:4, CPR:5, CPR:6, and CPR:9). <xref ref-type="table" rid="table2">Table 2</xref> shows the statistics of the ChemProt corpus.</p>
        <p>The original corpus consists of PubMed abstracts from biomedical literature in which more than 98% of relation entity pairs within a sentence [<xref ref-type="bibr" rid="ref8">8</xref>]. Therefore, we neglected the cross-sentence entity pairs and conducted experiment at the sentence level. For CPI extraction, we took some preprocessing steps on the original corpus. First, we split abstracts into sentences and only retained the sentences that contained the relational entity pairs. Then, we reassigned the training set and developing set with a ratio of 9:1. Finally, we replaced each digit string that was not an entity substring with a particular “num” tag.</p>
        <p><xref rid="figure3" ref-type="fig">Figure 3</xref> gives two illustrative examples of CPI extraction. In the first example, the sentence “Alprenolol and BAAM also caused surmountable antagonism of isoprenaline responses, and this beta 1-adrenoceptor antagonism was slowly reversible.” contains a relational entity pair. To accurately extract the CPI, we need to first detect the chemical entity “Alprenolol” and protein entity “beta 1-adrenoceptor,” and then classify the interaction as the CPR:6 class. The second example is a long and complex sentence. It is more difficult for the relation classifier to extract the interaction between the chemical and protein entities. Our model aims to predict the interactions, and the output is the relation type of chemical-protein entity pairs as shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>The chemical-protein relation (CPR) groups.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="270"/>
            <col width="630"/>
            <thead>
              <tr valign="top">
                <td>Group</td>
                <td>Evaluated in the BioCreative VI ChemProt shared task?</td>
                <td>ChemProt relations</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>CPR:1</td>
                <td>No</td>
                <td>PART_OF</td>
              </tr>
              <tr valign="top">
                <td>CPR:2</td>
                <td>No</td>
                <td>REGULATOR&#124;DIRECT_REGULATOR&#124;INDIRECT_REGULATOR</td>
              </tr>
              <tr valign="top">
                <td>CPR:3</td>
                <td>Yes</td>
                <td>UPREGULATOR&#124;ACTIVATOR&#124;INDIRECT_UPREGULATOR</td>
              </tr>
              <tr valign="top">
                <td>CPR:4</td>
                <td>Yes</td>
                <td>DOWNREGULATOR&#124;INHIBITOR&#124;INDIRECT_DOWNREGULATOR</td>
              </tr>
              <tr valign="top">
                <td>CPR:5</td>
                <td>Yes</td>
                <td>AGONIST&#124;AGONIST-ACTIVATOR&#124;AGONIST-INHIBITOR</td>
              </tr>
              <tr valign="top">
                <td>CPR:6</td>
                <td>Yes</td>
                <td>ANTAGONIST</td>
              </tr>
              <tr valign="top">
                <td>CPR:7</td>
                <td>No</td>
                <td>MODULATOR&#124;MODULATOR-ACTIVATOR&#124;MODULATOR-INHIBITOR</td>
              </tr>
              <tr valign="top">
                <td>CPR:8</td>
                <td>No</td>
                <td>COFACTOR</td>
              </tr>
              <tr valign="top">
                <td>CPR:9</td>
                <td>Yes</td>
                <td>SUBSTRATE&#124;PRODUCT_OF&#124;SUBSTRATE_PRODUCT_OF</td>
              </tr>
              <tr valign="top">
                <td>CPR:10</td>
                <td>No</td>
                <td>NOT</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Statistics of the ChemProt corpus.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Annotations</td>
                <td colspan="3">Data set</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Training, n</td>
                <td>Development, n</td>
                <td>Test, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Document</td>
                <td>1020</td>
                <td>612</td>
                <td>800</td>
              </tr>
              <tr valign="top">
                <td>Chemicals</td>
                <td>13,017</td>
                <td>8004</td>
                <td>10,810</td>
              </tr>
              <tr valign="top">
                <td>Proteins</td>
                <td>12,752</td>
                <td>7567</td>
                <td>10,019</td>
              </tr>
              <tr valign="top">
                <td>CPR<sup>a</sup>:3</td>
                <td>768</td>
                <td>550</td>
                <td>665</td>
              </tr>
              <tr valign="top">
                <td>CPR:4</td>
                <td>2254</td>
                <td>1094</td>
                <td>1661</td>
              </tr>
              <tr valign="top">
                <td>CPR:5</td>
                <td>173</td>
                <td>116</td>
                <td>195</td>
              </tr>
              <tr valign="top">
                <td>CPR:6</td>
                <td>235</td>
                <td>199</td>
                <td>293</td>
              </tr>
              <tr valign="top">
                <td>CPR:9</td>
                <td>727</td>
                <td>457</td>
                <td>644</td>
              </tr>
              <tr valign="top">
                <td>Evaluated CPIs<sup>b</sup></td>
                <td>4157</td>
                <td>2416</td>
                <td>3458</td>
              </tr>
              <tr valign="top">
                <td>Evaluated CPIs in one sentence</td>
                <td>4122</td>
                <td>2412</td>
                <td> 3444</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>CPR: chemical-protein relation.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>CPI: chemical-protein interaction.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Illustrative examples of chemical-protein relation (CPR) classes.</p>
          </caption>
          <graphic xlink:href="medinform_v8i5e17643_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Experimental Settings</title>
        <p>In this work, FastText [<xref ref-type="bibr" rid="ref23">23</xref>] was used to pretrain word embedding on the ChemProt corpus. Before the experiments, we set the range of parameters based on experience, then tuned the parameters on the development set by using grid search to determine the optimal parameters, and finally selected the best model of parameters that were optimal for evaluation on the test set. Without overfitting, the best model generally can achieve the best performance (the highest F-score) on the development set. The detailed tune range and hyperparameter values are listed in <xref ref-type="table" rid="table3">Table 3</xref>.</p>
      </sec>
      <sec>
        <title>Comparison of Different Pruning Distances</title>
        <p>To obtain the best pruning distance, we experimented with <italic>N</italic>{0,1,2,3,∞} on the ChemProt corpus—<italic>N</italic>=0 corresponds to pruning the tree down to the path; <italic>N</italic>=1 keeps all nodes that are directly attached to the path; <italic>N</italic>=2,3 means holding words up to distance 2 and 3 away from the dependency path in the LCA subtree; and <italic>N</italic>=∞ retains the entire LCA subtree.</p>
        <p>As shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>, the performance of our model reaches its peak and outperforms other pruning distance at <italic>N</italic>=2. This confirms that pruning too aggressively (<italic>N</italic>=0,1) could lead to a loss of crucial information while retaining too many irrelevant words (<italic>N</italic>=3) also decreases model performance due to the interference of irrelevant information. When <italic>N</italic>=2, the model achieves the best balance between including relevant and irrelevant information.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Hyperparameter setting.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="300"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td>Hyperparameter</td>
                <td>Tuned range</td>
                <td>Optimal</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Word embedding dimension</td>
                <td>[100,200,300]</td>
                <td>200</td>
              </tr>
              <tr valign="top">
                <td>POS<sup>a</sup> embedding dimension</td>
                <td>[10,20,30,40]</td>
                <td>20</td>
              </tr>
              <tr valign="top">
                <td>Entity type embedding dimension</td>
                <td>[40,50,60,70,80]</td>
                <td>60</td>
              </tr>
              <tr valign="top">
                <td>GCN<sup>b</sup> hidden units</td>
                <td>[100,200,300]</td>
                <td>200</td>
              </tr>
              <tr valign="top">
                <td>LSTM<sup>c</sup> hidden units</td>
                <td>[100,200,300]</td>
                <td>200</td>
              </tr>
              <tr valign="top">
                <td>Learning rate</td>
                <td>[0.1,0.2,0.3,0.4]</td>
                <td>0.3</td>
              </tr>
              <tr valign="top">
                <td>Dropout rate</td>
                <td>[0.4,0.5,0.6]</td>
                <td>0.5</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>POS: part-of-speech.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>GCN: graph convolutional network.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>LSTM: long short-term memory.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Comparison of different pruning distances.</p>
          </caption>
          <graphic xlink:href="medinform_v8i5e17643_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Comparison of Different Embedding Features</title>
        <p><xref ref-type="table" rid="table4">Table 4</xref> shows the effectiveness of different embedding features, including word embedding, entity type embedding, and POS embedding. The model achieves an F-score of 59.56% when only using word embedding. When POS and word embedding are combined, the F-score increases to 60.69%. When the entity type and word embedding are combined, the F-score increases to 62.52% (an increase of 2.96%). Furthermore, when both entity type and POS embedding are integrated with word embedding, the F-score improves to 65.17%. The results suggest that the main contributor to performance is prior knowledge of the entity type. This confirms the validity of the entity type in CPI extraction. The POS embedding is also valuable to the model.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Performance evaluation of different embedding features.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="220"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Embedding feature</td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td>F-score (%)</td>
                <td>Δ (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Word</td>
                <td>57.64</td>
                <td>61.62</td>
                <td>59.56</td>
                <td>—<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>Word+POS<sup>b</sup></td>
                <td>58.49</td>
                <td>63.06</td>
                <td>60.69</td>
                <td>+1.13</td>
              </tr>
              <tr valign="top">
                <td>Word+Entity type</td>
                <td>64.06</td>
                <td>61.05</td>
                <td>62.52</td>
                <td>+2.96</td>
              </tr>
              <tr valign="top">
                <td>Word+POS+Entity type</td>
                <td>63.79</td>
                <td>66.62</td>
                <td>65.17</td>
                <td>+5.61</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>Not applicable.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>POS: part-of-speech.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Comparison With the Baseline Method</title>
        <p>Different single models and their ensemble models are compared with each other in this section. As shown in <xref ref-type="table" rid="table5">Table 5</xref>, all ensemble models perform better than all single models, and the GCN+Bi-LSTM model performs better than the Bi-LSTM+CNN model. The results indicate that ensemble models can generally capture more information than single models. In terms of overall performance, the precision, recall, and F-score of the Bi-LSTM+GCN model are higher than those of the Bi-LSTM+CNN model. Our model can fully capture the overall information of the sentence by combining sequence structure information and syntactic information, while the Bi-LSTM+ CNN model could only obtain sequence structure information, which confirms the effectiveness of the GCN model in CPI extraction.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Comparison with the baseline method.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="350"/>
            <col width="0"/>
            <col width="200"/>
            <col width="0"/>
            <col width="200"/>
            <col width="0"/>
            <col width="220"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Model</td>
                <td colspan="2">Precision (%)</td>
                <td colspan="2">Recall (%)</td>
                <td>F-score (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>Single models</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CNN<sup>a</sup></td>
                <td colspan="2">42.47</td>
                <td colspan="2">69.43</td>
                <td colspan="2">52.70</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>GCN<sup>b</sup></td>
                <td colspan="2">48.77</td>
                <td colspan="2">63.69</td>
                <td colspan="2">55.24</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Bi-LSTM<sup>c</sup></td>
                <td colspan="2">60.59</td>
                <td colspan="2">60.34</td>
                <td colspan="2">60.46</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Ensemble models</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Bi-LSTM+CNN</td>
                <td colspan="2">57.77</td>
                <td colspan="2">64.73</td>
                <td colspan="2">61.05</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Bi-LSTM+GCN (our model)</td>
                <td colspan="2">63.79</td>
                <td colspan="2">66.62</td>
                <td colspan="2">65.17</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>CNN: convolutional neural network.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>GCN: graph convolutional network.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>Bi-LSTM: bi-directional long short-term memory.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>The experimental results suggest that our model can effectively extract CPIs; it is better at learning semantic and syntactic information from sentences compared to other models. Additionally, the pruning strategy can alleviate the influence of irrelevant words in long sentences in biomedical literature, by only retaining <italic>N</italic> away tokens from the dependency path in the LCA subtree.</p>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>A comparison of our model with other existing methods on the ChemProt corpus is shown in <xref ref-type="table" rid="table6">Table 6</xref>. It can be found that neural network–based methods perform better than traditional machine learning–based methods, and our method achieves the highest F-score of 65.17%.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Comparison with other existing methods.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="270"/>
            <col width="250"/>
            <col width="250"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td>F-score (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Verga et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td>
                <td>48.00</td>
                <td>54.10</td>
                <td>50.80</td>
              </tr>
              <tr valign="top">
                <td>Matos [<xref ref-type="bibr" rid="ref29">29</xref>]</td>
                <td>57.38</td>
                <td>47.22</td>
                <td>51.81</td>
              </tr>
              <tr valign="top">
                <td>Liu et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td>
                <td>57.4</td>
                <td>48.7</td>
                <td>52.7</td>
              </tr>
              <tr valign="top">
                <td>Lung et al [<xref ref-type="bibr" rid="ref30">30</xref>]</td>
                <td>63.52</td>
                <td>51.21</td>
                <td>56.71</td>
              </tr>
              <tr valign="top">
                <td>Corbett and Boyle [<xref ref-type="bibr" rid="ref13">13</xref>]</td>
                <td>62.97</td>
                <td>62.20</td>
                <td>62.58</td>
              </tr>
              <tr valign="top">
                <td>Mehryary et al [<xref ref-type="bibr" rid="ref9">9</xref>]</td>
                <td>59.05</td>
                <td>67.76</td>
                <td>63.10</td>
              </tr>
              <tr valign="top">
                <td>Peng et al [<xref ref-type="bibr" rid="ref14">14</xref>]</td>
                <td>72.66</td>
                <td>57.35</td>
                <td>64.10</td>
              </tr>
              <tr valign="top">
                <td>Our model</td>
                <td>63.79</td>
                <td>66.62</td>
                <td>65.17</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>Lung et al [<xref ref-type="bibr" rid="ref30">30</xref>] used machine learning methods to integrate the semantic and dependency graph features through a three-stage model. They achieved an F-score of 56.71%. Similarly, Corbett and Boyle [<xref ref-type="bibr" rid="ref13">13</xref>] used pretrained LSTM and Bi-LSTM to extract CPIs in two stages and achieved a higher F-score of 61.5%. A particular feature of their system was the usage of unlabeled data both to pretrain word embedding and pretrain LSTM layers in the neural network.</p>
        <p>Verga et al [<xref ref-type="bibr" rid="ref12">12</xref>] applied attention mechanisms in their model. They synthesized convolutions and self-attention to extract CPIs. Liu et al [<xref ref-type="bibr" rid="ref11">11</xref>] achieved an F-score of 52.7% by synthesizing GRU and attention pooling. The results of word-level attention weights in the model of Liu et al [<xref ref-type="bibr" rid="ref11">11</xref>] showed that attention mechanism is effective in selecting the most important trigger words when trained with semantic relation labels without the need of semantic parsing and feature engineering.</p>
        <p>Mehryary et al [<xref ref-type="bibr" rid="ref9">9</xref>] employed an ensemble system that combined the results of SVM and LSTM, and they achieved a competitive result. Peng et al [<xref ref-type="bibr" rid="ref14">14</xref>] utilized more external features. They stacked SVM, CNN, and RNN models, and combined the outputs of the three systems by either majority voting or stacking. They achieved the best F-score of 64.10% in the BioCreative VI ChemProt shared task. Our model synthesized Bi-LSTM and GCN and achieved an improvement of 1.07% in F-score over the system of Peng et al [<xref ref-type="bibr" rid="ref14">14</xref>]. We further performed significance tests with <italic>P</italic>&#60;.05 indicating significance. The <italic>P</italic> value of Peng et al [<xref ref-type="bibr" rid="ref14">14</xref>] and our model is less than .001. It indicates that the improvement of 1.07% in F-score is significant.</p>
      </sec>
      <sec>
        <title>Results Analysis</title>
        <p>The experimental results indicate that the GCN module is valuable in CPI extraction. It can extract CPIs from biomedical texts with syntactic graph representations. It might be also efficient in other biomedical tasks by utilizing the sentence parse structure. By comparing different pruning distance, we revealed that the length of sentence also plays an important role in relation extraction. The noisy words that are irrelevant to relations might hamper the performance of the extractor.</p>
        <p>GCNs can learn effective representation for relation extraction. However, a single GCN model could not capture the contextual information of word order. Additionally, GCN highly depends on correct parse trees to extract information from sentences, while existing parsing algorithms produce imperfect trees in many cases. To resolve these issues and improve the robustness of our model, we applied Bi-LSTM to generate contextualized representation and feed it into the GCN layer. The results confirm that the ensemble model of GCN and Bi-LSTM is validated for CPI extraction.</p>
      </sec>
      <sec>
        <title>Contributions</title>
        <p>The model we proposed in this paper aims to extract CPI and achieve state-of-the-art performance on the ChemProt corpus. Our main contributions are as follows.</p>
        <p>We proposed a novel neural model based on a GCN for CPI extraction, which can capture long-range syntactic information by utilizing the dependency structure of the input sentence. To improve the robustness, we applied a path-centric pruning strategy to remove irrelevant words without damaging crucial content on the dependency trees. Through the pruning strategy, the influence of noisy words can be reduced, thereby further improving the performance of the model. Furthermore, a Bi-LSTM layer is utilized to better leverage local word patterns regardless of parsing quality.</p>
        <p>Our model can automatically extract CPIs from a large amount of biomedical literature, which can save significant labor force and resources. Abundant biological entity relations can deliver useful chemicals for some diseases and save time by optimizing the drug development cycle, thereby helping pharmacists discover drugs. Furthermore, the knowledge graph generally contains rich, structured knowledge and has been widely used in natural language processing applications, such as search engines and question answering systems. However, the rapidly increasing volume of information requires refinement in the coverage of knowledge graphs. CPI extraction can help researchers to efficiently acquire biomedical knowledge, which can enrich the information needed for knowledge graph construction.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>We proposed a novel model based on a GCN to extract CPI. The GCN module can encode syntactic information over the dependency graphs of input sentences. To reduce the impact of noisy words, our model only retains tokens that are up to a distance of N=2 away from the dependency path in the LCA subtree. Additionally, it applies Bi-LSTM to generate a contextualized representation and feed it into the GCN layer to resolve parsing errors and improve the robustness of the model. The experimental results demonstrated that our model achieves state-of-the-art performance. We plan to further improve our model and apply our method to extract other biomedical relation entity pairs.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">Bi-LSTM</term>
          <def>
            <p>bi-directional long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CPI</term>
          <def>
            <p>chemical-protein interaction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CPR</term>
          <def>
            <p>chemical-protein relation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">FFNN</term>
          <def>
            <p>feed-forward neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">FN</term>
          <def>
            <p>false negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">FP</term>
          <def>
            <p>false positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">GCN</term>
          <def>
            <p>graph convolutional network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">GRU</term>
          <def>
            <p>gated recurrent units</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">LCA</term>
          <def>
            <p>lowest common ancestor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">MLP</term>
          <def>
            <p>multilayer perceptron</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">PMID</term>
          <def>
            <p>PubMed Unique Identifier</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">POS</term>
          <def>
            <p>part-of-speech</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">ReLU</term>
          <def>
            <p>rectified linear unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">TP</term>
          <def>
            <p>true positive</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We appreciate the valuable feedback provided by three anonymous reviewers. EW carried out the overall algorithm design and experiments as well as the writing of the manuscript. FW, ZY, LW, YZ, HL, and JW contributed to the algorithm design and the writing of the manuscript. All authors read and approved the final manuscript. This work was supported by a grant from the National Key Research and Development Program of China (#2016YFC0901902).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kringelum</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kjaerulff</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Brunak</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lund</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Oprea</surname>
              <given-names>TI</given-names>
            </name>
            <name name-style="western">
              <surname>Taboureau</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>ChemProt-3.0: a global chemical biology diseases mapping</article-title>
          <source>Database (Oxford)</source>
          <year>2016</year>
          <month>02</month>
          <day>13</day>
          <volume>2016</volume>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/database/article-lookup/doi/10.1093/database/bav123"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/database/bav123</pub-id>
          <pub-id pub-id-type="medline">26876982</pub-id>
          <pub-id pub-id-type="pii">bav123</pub-id>
          <pub-id pub-id-type="pmcid">PMC4752971</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Airola</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pyysalo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Björne</surname>
              <given-names>Jari</given-names>
            </name>
            <name name-style="western">
              <surname>Pahikkala</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ginter</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Salakoski</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>All-paths graph kernel for protein-protein interaction extraction with evaluation of cross-corpus learning</article-title>
          <source>BMC Bioinformatics</source>
          <year>2008</year>
          <month>11</month>
          <day>19</day>
          <volume>9 Suppl 11</volume>
          <issue>S11</issue>
          <fpage>S2</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-S11-S2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-9-S11-S2</pub-id>
          <pub-id pub-id-type="medline">19025688</pub-id>
          <pub-id pub-id-type="pii">1471-2105-9-S11-S2</pub-id>
          <pub-id pub-id-type="pmcid">PMC2586751</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miwa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sætre</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Miyao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tsujii</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A rich feature vector for protein-protein interaction extraction from multiple corpora</article-title>
          <source>Association for Computational Linguistics</source>
          <year>2009</year>
          <conf-name>Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing: Volume 1-Volume 1</conf-name>
          <conf-date>2009</conf-date>
          <conf-loc>Tokyo</conf-loc>
          <fpage>121</fpage>
          <lpage>130</lpage>
          <pub-id pub-id-type="doi">10.3115/1699510.1699527</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Walk-weighted subsequence kernels for protein-protein interaction extraction</article-title>
          <source>BMC Bioinformatics</source>
          <year>2010</year>
          <month>2</month>
          <day>25</day>
          <volume>11</volume>
          <issue>1</issue>
          <pub-id pub-id-type="doi">10.1186/1471-2105-11-107</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A Single Kernel-Based Approach to Extract Drug-Drug Interactions from Biomedical Literature</article-title>
          <source>PLoS ONE</source>
          <year>2012</year>
          <month>11</month>
          <day>1</day>
          <volume>7</volume>
          <issue>11</issue>
          <fpage>e48901</fpage>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0048901</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Segura-Bedmar</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Martínez</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Herrero-Zazo</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Lessons learnt from the DDIExtraction-2013 Shared Task</article-title>
          <source>Journal of Biomedical Informatics</source>
          <year>2014</year>
          <month>10</month>
          <volume>51</volume>
          <fpage>152</fpage>
          <lpage>164</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2014.05.007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krallinger</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rabal</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lourenço</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oyarzabal</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Valencia</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Information Retrieval and Text Mining Technologies for Chemistry</article-title>
          <source>Chem. Rev</source>
          <year>2017</year>
          <month>05</month>
          <day>05</day>
          <volume>117</volume>
          <issue>12</issue>
          <fpage>7673</fpage>
          <lpage>7761</lpage>
          <pub-id pub-id-type="doi">10.1021/acs.chemrev.6b00851</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krallinger</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rabal</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Akhondi</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Overview of the BioCreative VI chemical-protein interaction track</article-title>
          <year>2017</year>
          <conf-name>Proceedings of the sixth BioCreative challenge evaluation workshop</conf-name>
          <conf-date>2017</conf-date>
          <conf-loc>Bethesda, MD, USA</conf-loc>
          <fpage>141</fpage>
          <lpage>146</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mehryary</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Björne</surname>
              <given-names>Jari</given-names>
            </name>
            <name name-style="western">
              <surname>Salakoski</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ginter</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Potent pairing: ensemble of long short-term memory networks and support vector machine for chemical-protein relation extraction</article-title>
          <source>Database (Oxford)</source>
          <year>2018</year>
          <month>01</month>
          <day>01</day>
          <volume>2018</volume>
          <fpage>1</fpage>
          <lpage>23</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/database/article-lookup/doi/10.1093/database/bay120"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/database/bay120</pub-id>
          <pub-id pub-id-type="medline">30576487</pub-id>
          <pub-id pub-id-type="pii">5255148</pub-id>
          <pub-id pub-id-type="pmcid">PMC6310522</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Warikoo</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>LPTK: a linguistic pattern-aware dependency tree kernel approach for the BioCreative VI CHEMPROT task</article-title>
          <source>Database (Oxford)</source>
          <year>2018</year>
          <month>01</month>
          <day>01</day>
          <volume>2018</volume>
          <fpage>1</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/database/article-lookup/doi/10.1093/database/bay108"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/database/bay108</pub-id>
          <pub-id pub-id-type="medline">30346607</pub-id>
          <pub-id pub-id-type="pii">5139652</pub-id>
          <pub-id pub-id-type="pmcid">PMC6196310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Komandur Elayavilli</surname>
              <given-names>Ravikumar</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Yanshan</given-names>
            </name>
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>Majid</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhary</surname>
              <given-names>Vipin</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Extracting chemical-protein relations using attention-based neural networks</article-title>
          <source>Database (Oxford)</source>
          <year>2018</year>
          <month>1</month>
          <day>12</day>
          <volume>2018</volume>
          <fpage>1</fpage>
          <lpage>12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/database/article-lookup/doi/10.1093/database/bay102"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/database/bay102</pub-id>
          <pub-id pub-id-type="medline">30295724</pub-id>
          <pub-id pub-id-type="pii">5122756</pub-id>
          <pub-id pub-id-type="pmcid">PMC6174551</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Verga</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Strubell</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Mccallum</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Simultaneously self-attending to all mentions for full-abstract biological relation extraction</article-title>
          <year>2018</year>
          <conf-name>Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)</conf-name>
          <conf-date>2018</conf-date>
          <conf-loc>New Orleans, Louisiana</conf-loc>
          <fpage>872</fpage>
          <lpage>884</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Corbett</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Boyle</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Improving the learning of chemical-protein interactions from literature using transfer learning and specialized word embeddings</article-title>
          <source>Database (Oxford)</source>
          <year>2018</year>
          <month>1</month>
          <day>10</day>
          <volume>2018</volume>
          <fpage>1</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/database/article-lookup/doi/10.1093/database/bay066"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/database/bay066</pub-id>
          <pub-id pub-id-type="medline">30010749</pub-id>
          <pub-id pub-id-type="pii">5053190</pub-id>
          <pub-id pub-id-type="pmcid">PMC6044291</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rios</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kavuluru</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Extracting chemical-protein relations with ensembles of SVM and deep learning models</article-title>
          <source>Database (Oxford)</source>
          <year>2018</year>
          <month>01</month>
          <day>09</day>
          <volume>2018</volume>
          <fpage>1</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/database/article-lookup/doi/10.1093/database/bay073"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/database/bay073</pub-id>
          <pub-id pub-id-type="medline">30020437</pub-id>
          <pub-id pub-id-type="pii">5055578</pub-id>
          <pub-id pub-id-type="pmcid">PMC6051439</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yüksel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Öztürk</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ozkirimli</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Özgür</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>CNN-based chemical-protein interactions classification</article-title>
          <year>2017</year>
          <conf-name>Proceedings of the BioCreative VI Workshop</conf-name>
          <conf-date>2017</conf-date>
          <conf-loc>Boğaziçi University, İstanbul, Turkey</conf-loc>
          <fpage>184</fpage>
          <lpage>186</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long Short-Term Memory</article-title>
          <source>Neural Computation</source>
          <year>1997</year>
          <month>11</month>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>1780</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Merrienboer</surname>
              <given-names>Bv</given-names>
            </name>
            <name name-style="western">
              <surname>Gulcehre</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bahdanau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bougares</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Schwenk</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Learning phrase representations using RNN encoder? Decoder for statistical machine translation (arXiv:1406.1078)</article-title>
          <source>arXiv.org</source>
          <year>2014</year>
          <fpage>1724</fpage>
          <lpage>1734</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1406.1078.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kipf</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Welling</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Semi-Supervised Classification with Graph Convolutional Networks (arXiv:1609.02907)</article-title>
          <source>arXiv.org</source>
          <year>2017</year>
          <month>02</month>
          <day>22</day>
          <fpage>1</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1609.02907.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marcheggiani</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Titov</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Encoding sentences with graph convolutional networks for semantic role labeling (arXiv:1703.04826)</article-title>
          <source>arXiv.org</source>
          <year>2017</year>
          <fpage>1</fpage>
          <lpage>11</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1703.04826.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Graph Convolution over Pruned Dependency Trees Improves Relation Extraction</article-title>
          <year>2018</year>
          <conf-name>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>2018</conf-date>
          <conf-loc>Brussels, Belgium</conf-loc>
          <fpage>2205</fpage>
          <lpage>2215</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dblp.uni-trier.de/db/conf/emnlp/emnlp2018.html#Zhang0M18"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miwa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bansal</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>End-to-end relation extraction using LSTMs on sequences and tree structures (arXiv:1601.00770)</article-title>
          <source>arXiv preprint</source>
          <year>2016</year>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1601.00770.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Surdeanu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bauer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Finkel</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Bethard</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>McClosky</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The Stanford CoreNLP natural language processing toolkit</article-title>
          <year>2014</year>
          <conf-name>Proceedings of 52nd annual meeting of the association for computational linguistics: system demonstrations</conf-name>
          <conf-date>June 23-24, 2014</conf-date>
          <conf-loc>Baltimore, Maryland USA</conf-loc>
          <fpage>55</fpage>
          <lpage>60</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bojanowski</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Grave</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Joulin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Enriching Word Vectors with Subword Information</article-title>
          <source>Transactions of the Association for Computational Linguistics</source>
          <year>2017</year>
          <volume>5</volume>
          <fpage>135</fpage>
          <lpage>146</lpage>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00051</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Attention Guided Graph Convolutional Networks for Relation Extraction</article-title>
          <year>2019</year>
          <conf-name>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <fpage>241</fpage>
          <lpage>251</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dblp.uni-trier.de/db/conf/acl/acl2019-1.html#GuoZL19"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sahu</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Christopoulou</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Miwa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ananiadou</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Inter-sentence Relation Extraction with Document-level Graph Convolutional Neural Network (arXiv:1906.04684)</article-title>
          <source>arXiv.org</source>
          <year>2019</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1906.04684.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Long-tail relation extraction via knowledge graph embeddings and graph convolution networks (arXiv:1903.01306)</article-title>
          <source>arXiv.org</source>
          <year>2019</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1903.01306.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>End-to-end neural coreference resolution (arXiv:1707.07045)</article-title>
          <source>arXiv.org</source>
          <year>2017</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1707.07045.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Santoro</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Raposo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Barrett</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Malinowski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pascanu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Battaglia</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lillicrap</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>A simple neural network module for relational reasoning</article-title>
          <year>2017</year>
          <conf-name>Advances in neural information processing systems</conf-name>
          <conf-date>2017</conf-date>
          <conf-loc>Long Beach, CA, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Matos</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Extracting chemical–protein interactions using long short-term memory networks</article-title>
          <year>2017</year>
          <conf-name>Proceedings of the BioCreative VI Workshop</conf-name>
          <conf-date>18-20 October 2017</conf-date>
          <conf-loc>Bethesda</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lung</surname>
              <given-names>PY</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Extracting chemical-protein interactions from literature</article-title>
          <year>2017</year>
          <conf-name>Proceedings of the BioCreative VI Workshop</conf-name>
          <conf-date>2017</conf-date>
          <conf-loc>Florida State University, Tallahassee, FL, 32306 USA</conf-loc>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
