<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i6e37804</article-id>
      <article-id pub-id-type="pmid">35671070</article-id>
      <article-id pub-id-type="doi">10.2196/37804</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Conditional Probability Joint Extraction of Nested Biomedical Events: Design of a Unified Extraction Framework Based on Neural Networks</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Hao</surname>
            <given-names>Tianyong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhang</surname>
            <given-names>Tongxuan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>An</surname>
            <given-names>Yang</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Yan</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1036-9365</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Jian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>School of Computer Science and Technology</institution>
            <institution>Dalian University of Technology</institution>
            <addr-line>No 2 Linggong Road</addr-line>
            <addr-line>Dalian, 116024</addr-line>
            <country>China</country>
            <phone>86 13604119266</phone>
            <email>wangjian@dlut.edu.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4656-7446</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>Huiyi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4384-4636</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>Bing</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1217-4169</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Yijia</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5843-4675</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Banbhrani</surname>
            <given-names>Santosh Kumar</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2573-9162</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>Hongfei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0872-7688</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>School of Computer Science and Technology</institution>
        <institution>Dalian University of Technology</institution>
        <addr-line>Dalian</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Pharmacy</institution>
        <institution>The Second Affiliated Hospital of Dalian Medical University</institution>
        <addr-line>Dalian</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Information Science and Technology</institution>
        <institution>Dalian Maritime University</institution>
        <addr-line>Dalian</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jian Wang <email>wangjian@dlut.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>6</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>7</day>
        <month>6</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>6</issue>
      <elocation-id>e37804</elocation-id>
      <history>
        <date date-type="received">
          <day>8</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>6</day>
          <month>4</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>15</day>
          <month>4</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>19</day>
          <month>4</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Yan Wang, Jian Wang, Huiyi Lu, Bing Xu, Yijia Zhang, Santosh Kumar Banbhrani, Hongfei Lin. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 07.06.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/6/e37804" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Event extraction is essential for natural language processing. In the biomedical field, the nested event phenomenon (event A as a participating role of event B) makes extracting this event more difficult than extracting a single event. Therefore, the performance of nested biomedical events is always underwhelming. In addition, previous works relied on a pipeline to build an event extraction model, which ignored the dependence between trigger recognition and event argument detection tasks and produced significant cascading errors.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to design a unified framework to jointly train biomedical event triggers and arguments and improve the performance of extracting nested biomedical events.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We proposed an end-to-end joint extraction model that considers the probability distribution of triggers to alleviate cascading errors. Moreover, we integrated the syntactic structure into an attention-based gate graph convolutional network to capture potential interrelations between triggers and related entities, which improved the performance of extracting nested biomedical events.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The experimental results demonstrated that our proposed method achieved the best F1 score on the multilevel event extraction biomedical event extraction corpus and achieved a favorable performance on the biomedical natural language processing shared task 2011 Genia event corpus.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our conditional probability joint extraction model is good at extracting nested biomedical events because of the joint extraction mechanism and the syntax graph structure. Moreover, as our model did not rely on external knowledge and specific feature engineering, it had a particular generalization performance.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>nested biomedical event</kwd>
        <kwd>joint extraction</kwd>
        <kwd>graph convolutional network</kwd>
        <kwd>GCN</kwd>
        <kwd>Dice loss</kwd>
        <kwd>syntactic structure</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>In recent years, event extraction research has attracted wide attention, especially in biomedical event extraction, which is critical for understanding the biomolecular interactions described in the scientific corpus. Events are important concepts in the field of information extraction. However, researchers have different definitions of events, based on different research purposes and perspectives. In the general domain, an event is a specific thing that describes a state change involving different participants, such as the evaluation of automatic content extraction, in which 8 categories and 33 subcategories of events are defined in a hierarchical structure, and each type of event contains a different semantic role. In the biomedical field, McDonald et al [<xref ref-type="bibr" rid="ref1">1</xref>] defined event extraction as multirelationship extraction, the purpose of which was to extract semantic role information between different entities in an event. For example, the biomedical natural language processing (BioNLP) evaluation task defined 9 different categories of biochemical events. Each event included an event trigger and at least one event argument, and the different event types had different semantic roles. Unlike the events in automatic content extraction, biomedical events may have nested event phenomena.</p>
        <p>To clearly describe the progress of biomedical event extraction, we defined 4 concepts for biomedical events, as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref> and <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Basic progress of biomedical event extraction, where yellow boxes represent the type of entity and the blue boxes represent the type of trigger. Theme and cause represent the relationship between participant and event, namely, argument detection. IL-8: interleukin 8; TNF-alpha: tumor necrosis factor.</p>
          </caption>
          <graphic xlink:href="medinform_v10i6e37804_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <boxed-text id="box1" position="float">
          <title>Concepts for biomedical events.</title>
          <p>
            <bold>Event type</bold>
          </p>
          <p>The semantic type of different events</p>
          <p>
            <bold>Event description</bold>
          </p>
          <p>A complete sentence or clause in the text that specifically describes at least one event</p>
          <p>
            <bold>Event trigger</bold>
          </p>
          <p>A word or phrase representing the occurrence of an event in the event description; usually of a <italic>verb</italic> or <italic>nonverb</italic> nature, and its category is event type; it should be noted that each event has only 1 event trigger.</p>
          <p>
            <bold>Event argument</bold>
          </p>
          <p>The event participants describe the different semantic roles in the event, whose type represents the relationship between the event and related participants; in the biomedical event system, there are 6 different semantic roles, where “theme” and “cause” are core arguments.</p>
        </boxed-text>
        <p>The task of event extraction comprises 3 subtasks: named entity recognition, trigger recognition, and event argument detection. Previous studies have relied on pipeline methods [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref5">5</xref>] to extract biomedical events. For example, given the event description (a sentence) shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>, the event extraction system can find 2 entities (“TNF-alpha” and “IL-8”) in this sentence at the named entity recognition step. After recognizing triggers, it can identify a <italic>positive regulation</italic> (“Pos_Reg”) event mention triggered by a word <italic>activator</italic> and an <italic>expression</italic> (“Exp”) event mention triggered by a word <italic>expression</italic>. On the basis of the recognized entities and triggers, the system detects arguments and associates them with the related event triggers. Thus, the entity “TNF-alpha” is a participant in the <italic>positive regulation</italic> event, and the entity “IL-8” is a participant in the <italic>expression</italic> event. As the result of the previous step is the input of the subsequent step, the pipeline methods probably introduce cascading errors if the precision of the previous step is biased.</p>
        <p>As the syntactic dependency tree enriches the feature representation, previous studies tended to use syntactic relations to improve the performance of event extraction. For example, Kilicoglu et al [<xref ref-type="bibr" rid="ref2">2</xref>] leveraged external tools to segment sentences, annotate parts of speech (POS), and parse syntactic dependency. Then, they joined these features to extract biomedical events using a dictionary and rules. Björne et al [<xref ref-type="bibr" rid="ref4">4</xref>] transferred the syntactic relations to the path embeddings, then combined them with word embeddings, POS embeddings, entity embeddings, distance embeddings, and relative position embeddings to feed into the convolutional neural network (CNN) model to extract biomedical events. However, the previous studies only adopted syntactic relations as the external features and ignored the interrelations between triggers and related entities obtained from the syntactic dependency tree, which improved the performance of extracting simple events but not nested events.</p>
        <p>In this study, we mainly used the multilevel event extraction (MLEE) corpus [<xref ref-type="bibr" rid="ref6">6</xref>] and the BioNLP shared task (BioNLP-ST) 2011 Genia event (GE) corpus [<xref ref-type="bibr" rid="ref7">7</xref>] to evaluate our method. There is some explanation regarding the MLEE extending event extraction methods to the biomedical information field and covering all levels of biological tissue from molecules to entire organisms. The MLEE label scheme is the same as the BioNLP event system but has more abundant event types: 4 major categories (anatomical, molecular, general, and planned) and 19 subcategories. The specific information is shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Primary event types and argument roles in the multilevel event extraction corpus (N=6827).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="270"/>
            <col width="0"/>
            <col width="400"/>
            <col width="0"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Event and subevent types</td>
                <td colspan="2">Core arguments</td>
                <td>Values, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>Anatomical</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Cell proliferation</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">133 (2.42)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Development</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">316 (4.81)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Blood vessel development</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">855 (12.91)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Growth</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">469 (2.65)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Death</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">97 (1.53)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Breakdown</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">69 (1.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Remodeling</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">33 (0.45)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Molecular</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Synthesis</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">17 (0.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gene expression</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">435 (6.66)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Transcription</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">37 (0.61)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Catabolism</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">26 (0.39)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Phosphorylation</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">33 (0.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Dephosphorylation</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">6 (0.09)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>General</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Localization</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">450 (6.87)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Binding</td>
                <td colspan="2">Theme (entity)</td>
                <td colspan="2">187 (2.92)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Regulation</td>
                <td colspan="2">Theme (entity or event) and cause (entity or event)</td>
                <td colspan="2">773 (11.81)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Positive regulation</td>
                <td colspan="2">Theme (entity or event) and cause (entity or event)</td>
                <td colspan="2">1327 (20.33)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Negative regulation</td>
                <td colspan="2">Theme (entity or event) and cause (entity or event)</td>
                <td colspan="2">921 (14.08)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Planned</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Planned process</td>
                <td colspan="2">Theme (entity or event)</td>
                <td colspan="2">643 (9.9)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>To abate the impact of cascading errors, we propose an end-to-end conditional probability joint extraction (CPJE) method that can effectively transmit trigger distribution information to the event argument detection task. To capture the interrelations between triggers and related entities and improve the performance of extracting nested biomedical events, we integrated the syntactic dependency tree into an attention-based gate graph convolutional network (GCN), which can capture the flow direction of the key information. The contributions of this study are as follows:</p>
        <list list-type="order">
          <list-item>
            <p>We propose an end-to-end CPJE framework, CPJE, which effectively leverages trigger distribution information to enhance the performance of event argument detection and weakens cascading errors in the overall event extraction process.</p>
          </list-item>
          <list-item>
            <p>We used the syntactic dependency tree to capture the interrelations between triggers and related entities and integrated the tree into an attention-based gate GCN to extract nested biomedical events.</p>
          </list-item>
          <list-item>
            <p>We obtained state-of-the-art performance on the MLEE and BioNLP-ST 2011 GE corpora for extracting nested biomedical events.</p>
          </list-item>
        </list>
        <p>We summarize the current frameworks for event extraction tasks in the <italic>Related Works</italic> section. We introduce our framework in the <italic>Methods</italic> section. We display the overall performance in the <italic>Results</italic> section. We present the ablation study, visualization, and case study in the <italic>Discussion</italic> section. We summarize this work and discuss future research directions in the <italic>Conclusions</italic> section.</p>
      </sec>
      <sec>
        <title>Related Works</title>
        <p>The biomedical event extraction problem is similar to general domain event extraction and entity relationship extraction; therefore, we have many theoretical foundations and experimental methods that can be used for reference.</p>
        <sec>
          <title>Entity Relationship Extraction</title>
          <p>Biomedical events can be regarded as complex relationship extraction tasks, and relationship extraction methods have achieved excellent results in various fields. Therefore, we studied some relationship extraction methods to help conceive the construction of event extraction models. With the development of deep learning, an increasing number of researchers have used deep learning algorithms to achieve the joint extraction of entity relationships [<xref ref-type="bibr" rid="ref8">8</xref>]. To solve the problem of a sparse number of labeled samples, distant supervision methods have been applied to the relationship extraction task [<xref ref-type="bibr" rid="ref9">9</xref>]. Deep reinforcement learning (RL) algorithms have also been applied to the relationship extraction task to solve noisy data samples [<xref ref-type="bibr" rid="ref10">10</xref>]. In addition, with the widespread application of graph neural networks (GNNs), GCNs have been used in certain relation-extraction tasks [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>].</p>
        </sec>
        <sec>
          <title>General Domain Event Extraction</title>
          <p>In general, news event extraction is a research hot spot. Some methods have improved the performance of event extraction by studying feature engineering. Sentence-level feature extraction included combinational features of triggers and event arguments [<xref ref-type="bibr" rid="ref13">13</xref>] or combinational features of triggers and entity relationships [<xref ref-type="bibr" rid="ref14">14</xref>]. Document-level feature extraction included common information event extraction from multiple documents [<xref ref-type="bibr" rid="ref15">15</xref>] and joint event argument extraction based on latent-variable semi-Markov conditional random fields [<xref ref-type="bibr" rid="ref16">16</xref>]. Others have also used deep learning to reduce feature engineering, which improves a model’s generalization ability and extraction performance; for example, learning context-dependency information with recurrent neural networks [<xref ref-type="bibr" rid="ref17">17</xref>], detecting events with nonconsecutive CNNs [<xref ref-type="bibr" rid="ref18">18</xref>], and obtaining syntactic structure information with GCNs [<xref ref-type="bibr" rid="ref19">19</xref>]. All these methods have laid a better foundation for the extraction of biomedical events.</p>
        </sec>
        <sec>
          <title>Biomedical Event Extraction</title>
          <p>Extracting biomedical events is one of the BioNLP-STs [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Previous studies mainly explored human-engineered features based on a support vector machine model [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. Owing to error transmission in the pipeline approach, Riedel et al [<xref ref-type="bibr" rid="ref26">26</xref>] developed a joint model with dual decomposition, and Venugopal et al [<xref ref-type="bibr" rid="ref27">27</xref>] leveraged Markov logic networks for joint inference. Recently, most studies have observed remarkable benefits of neural models. For example, some have started to add POS tags and syntactic parsing with different neural models [<xref ref-type="bibr" rid="ref28">28</xref>], improved the biomedical event extraction model using semisupervised frameworks [<xref ref-type="bibr" rid="ref29">29</xref>], attempted to use attention mechanisms to obtain the semantic relationship of biomedical texts [<xref ref-type="bibr" rid="ref5">5</xref>], and used distributed representations to obtain context embedding [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. To incorporate more information from the biomedical knowledge base (KB), Zhao et al [<xref ref-type="bibr" rid="ref32">32</xref>] leveraged a RL framework to extract biomedical events with representations from external biomedical KBs. Li et al [<xref ref-type="bibr" rid="ref33">33</xref>] fused gene ontology into tree long short-term memory (LSTM) models with distributional representations. Huang et al [<xref ref-type="bibr" rid="ref34">34</xref>] used a GNN to hierarchically emulate 2 knowledge-based views from the Unified Medical Language System with conceptual and semantic inference paths. Trieu et al [<xref ref-type="bibr" rid="ref35">35</xref>] used multiple overlapping, directed, acyclic graph structures to jointly extract biomedical entities, triggers, roles, and events. Zhao et al [<xref ref-type="bibr" rid="ref36">36</xref>] combined a dependency-based GCN with a hypergraph to jointly extract biomedical events. Ramponi et al [<xref ref-type="bibr" rid="ref37">37</xref>] proposed a joint end-to-end framework that regards biomedical event extraction as sequence labeling with a multilabel aware encoding strategy.</p>
          <p>Compared with these methods, our approach joint extracts the biomedical events with a probability distribution of triggers, which alleviates the cascading errors introduced by the pipeline methods. Moreover, considering the potential interrelations between triggers and related entities, our approach integrates the syntactic structure into an attention-based gate GCN to capture the flow direction of key information, which greatly improves the extraction performance for nested biomedical events. It is important to mention that our approach does not require any external resources to assist the biomedical event extraction task.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>This section illustrates the proposed CPJE model. Let <italic>W</italic>={<italic>w</italic><sub>1</sub>,<italic>w</italic><sub>2</sub>,...,<italic>w</italic><sub>n</sub>} be a sentence of length <italic>n</italic>, where <italic>w</italic><sub>i</sub> is the <italic>i</italic>th word in a sentence. Similarly, <italic>E</italic>={<italic>e</italic><sub>1</sub>,<italic>e</italic><sub>2</sub>,...,<italic>e</italic><sub>k</sub>} is a set of entities mentioned in a sentence, where <italic>k</italic> is the number of entities. As the trigger may comprise multiple tokens, we used the BIO tag scheme to annotate the trigger type of each token in the sentence. When we obtained the corresponding event trigger in the sentence, we used this information to predict the corresponding event arguments.</p>
        <p>As shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>, our CPJE model mainly includes 3 layers: an input layer, an information extraction layer, and a joint extraction layer. The input layer converts unstructured text information (such as word sequences, syntactic structure trees, POS label representations, and entity label information) into a structured discrete representation and inputs it into the next layer. The information extraction layer converts discrete information into continuous feature representations, which deeply extracts the semantic and dependence information in a sentence. The joint extraction layer parses the previous fusion information and sends the parsed information into the trigger softmax classifier and event softmax classifier to jointly extract biomedical events.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>The architecture of the conditional probability joint extraction framework, where numbers 0 to 9 represent each word in the sentence, the blue bar represents BioBERT embedding, the yellow bar represents POS-tagging embedding, and the green bar represents entity embedding. BERT: Bidirectional Encoder Representation From Transformers; BioBERT: Biomedical Bidirectional Encoder Representation From Transformers; B-BVD: B-blood vessel development; LSTM: long short-term memory; POS: parts of speech.</p>
          </caption>
          <graphic xlink:href="medinform_v10i6e37804_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Information Extraction Layer</title>
        <p>This is not explained in detail as the input layer was too superficial (only converting the text into a sequence of numbers). Each module of the information extraction layer is presented in the following sections.</p>
        <sec>
          <title>Word Representation</title>
          <p>In the word representation module, to improve the representation capability of the initial features, each word <italic>w</italic><sub>i</sub> in the sentence is transformed to a real-valued vector <italic>x</italic><sub>i</sub> by concatenating the embeddings described in the following sections.</p>
        </sec>
        <sec>
          <title>Biomedical Bidirectional Encoder Representation From Transformers Embedding</title>
          <p>We used the Biomedical Bidirectional Encoder Representation from Transformers (BioBERT) pretraining model [<xref ref-type="bibr" rid="ref38">38</xref>] to obtain the dynamic semantic representation of the word <italic>w</italic><sub>i</sub>. BioBERT embedding comprises token embedding, segment embedding, and position embedding, which is encoded as a consequence by a multilayer bidirectional transformer. Thus, it includes rich semantic and positional information. Furthermore, it can solve the polysemy problem of words. We define <italic>a</italic><sub>i</sub> as the word vector representation of the word <italic>w</italic><sub>i</sub>.</p>
        </sec>
        <sec>
          <title>POS-Tagging Embedding</title>
          <p>We used a randomly initialized POS-tagging embedding table to obtain each POS-tagging vector. We defined <italic>b</italic><sub>i</sub> as the POS-tagging vector representation of the word <italic>w</italic><sub>i</sub>.</p>
        </sec>
        <sec>
          <title>Entity Label Embedding</title>
          <p>Similar to the POS-tagging embedding, we used the BIO label scheme to annotate the entities mentioned in the sentence and convert the entity type label into a real-value vector by consulting the embedding table. We defined <italic>c</italic><sub>i</sub> as the entity vector representation of the word <italic>w</italic><sub>i</sub>.</p>
          <p>The transformation from the token <italic>w</italic><sub>i</sub> to the vector <italic>x</italic><sub>i</sub> converts the input sentence <italic>W</italic> into a sequence of real-valued vectors <italic>X</italic>={<italic>x</italic><sub>1</sub>,<italic>x</italic><sub>2</sub>,...,<italic>x</italic><sub>n</sub>}, <inline-graphic xlink:href="medinform_v10i6e37804_fig7.png" xlink:type="simple" mimetype="image"/>, where <inline-graphic xlink:href="medinform_v10i6e37804_fig8.png" xlink:type="simple" mimetype="image"/> is the concatenation operation, <italic>x</italic><sub>i</sub> is the μ dimension (ie, the sum of the dimensions of <italic>a</italic><sub>i</sub>, <italic>b</italic><sub>i</sub>, and <italic>c</italic><sub>i</sub>), and <inline-graphic xlink:href="medinform_v10i6e37804_fig9.png" xlink:type="simple" mimetype="image"/>. <italic>X</italic> is fed into the subsequent blocks to obtain more valuable information for extracting biomedical events.</p>
        </sec>
        <sec>
          <title>Bidirectional LSTM</title>
          <p>To obtain the context information of the input text and avoid the gradient explosion problem caused by long texts, we chose the classic bidirectional LSTM (BiLSTM) structure to extract the context features of the word representations.</p>
          <p>We fed the word representation sequence <italic>X</italic>={<italic>x</italic><sub>1</sub>,<italic>x</italic><sub>2</sub>,...,<italic>x</italic><sub>n</sub>} into BiLSTM to obtain the forward hidden unit <italic>h</italic><sub>t</sub><sup>f</sup> and the backward hidden unit <italic>h</italic><sub>t</sub><sup>b</sup> with φ dimension in time <italic>t</italic> according to equation 1. We represented all the hidden states of the forward LSTM and backward LSTM as <inline-graphic xlink:href="medinform_v10i6e37804_fig10.png" xlink:type="simple" mimetype="image"/> and <inline-graphic xlink:href="medinform_v10i6e37804_fig11.png" xlink:type="simple" mimetype="image"/>, respectively, where <italic>n</italic> is the number of LSTM hidden units:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i6e37804_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            <graphic xlink:href="medinform_v10i6e37804_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>Finally, we concatenated these 2 matrices to obtain the context representation <inline-graphic xlink:href="medinform_v10i6e37804_fig14.png" xlink:type="simple" mimetype="image"/> of BiLSTM:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i6e37804_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </sec>
        <sec>
          <title>Gate GCN</title>
          <p>To obtain the syntactic dependence in a sentence, we reference the method proposed by Liu et al [<xref ref-type="bibr" rid="ref19">19</xref>] to apply a gate GCN model to analyze the sentence-dependent features. We considered an undirected graph G=(<italic>V</italic>, ε) as a syntactic dependency tree for the sentence <italic>W</italic>, where <italic>V</italic> is the set of nodes and ε is the set of edges. Defining <inline-graphic xlink:href="medinform_v10i6e37804_fig16.png" xlink:type="simple" mimetype="image"/>, <italic>v</italic><sub>i</sub> represents each word <italic>w</italic><sub>i</sub> of sentence <italic>W</italic>, and each edge <inline-graphic xlink:href="medinform_v10i6e37804_fig17.png" xlink:type="simple" mimetype="image"/> represents a directed syntactic arc from word <italic>w</italic><sub>i</sub> to word <italic>w</italic><sub>j</sub>, with dependency type <italic>Re</italic>. In addition, for the sake of moving information along the direction, we add the corresponding reversed edge (<italic>v</italic><sub>w</sub>, <italic>v</italic><sub>i</sub>) with dependency type <italic>Re′</italic> and self-loops (<italic>v</italic><sub>i</sub>, <italic>v</italic><sub>i</sub>) for any node <italic>v</italic><sub>i</sub>. According to statistics, we used the Stanford Parser [<xref ref-type="bibr" rid="ref39">39</xref>] to obtain approximately 50 different kinds of syntactic dependency. To facilitate the GCN internal calculation, we only considered the direction of information flow and simplified the original dependency into 3 forms, as shown in equation 4:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i6e37804_fig18.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>For node <inline-graphic xlink:href="medinform_v10i6e37804_fig19.png" xlink:type="simple" mimetype="image"/>, we can use the hidden vector <italic>h</italic><sub>v</sub><sup>(j)</sup> in the <italic>j</italic>th gate GCN layer to compute the hidden vector <italic>h</italic><sub>v</sub><sup>(j+1)</sup> of the next layer:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i6e37804_fig20.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <italic>Re</italic>(<italic>u</italic>,<italic>v</italic>) is the dependency type between nodes <italic>u</italic> and <italic>v</italic>, <italic>W<sub>Re(u,v)</sub></italic><sup>(j)</sup> and <italic>b<sub>Re(u,v)</sub></italic><sup>(j)</sup> are the weight matrix and bias, respectively. <italic>N</italic> (<italic>v</italic>) is the set of neighbors of node <italic>v</italic>, including <italic>V</italic>. The weight of edge (<italic>u</italic>, <italic>v</italic>) is <italic>g<sub>u,v</sub></italic><sup>(j)</sup>, which applies the gate to the edge to indicate the importance of the edge, as shown in equation 6:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i6e37804_fig21.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>Here, <italic>V<sub>Re(u,v)</sub></italic><sup>j</sup> and <italic>d<sub>Re(u,v)</sub></italic><sup>j</sup> are the gate weight matrix and bias, respectively. We used BioBERT embedding <italic>A</italic>={<italic>a</italic><sub>1</sub>,<italic>a</italic><sub>2</sub>,...,<italic>a</italic><sub>n</sub>} to initialize the input of the first GCN layer. Stacking <italic>k</italic> GCN layers can obtain a syntactic information matrix <inline-graphic xlink:href="medinform_v10i6e37804_fig22.png" xlink:type="simple" mimetype="image"/>, where <italic>m</italic> is the dimension of node <italic>v</italic><sub>i</sub> with the same dimension of <italic>a</italic><sub>i</sub>.</p>
        </sec>
        <sec>
          <title>Multi-Head Attention</title>
          <p>As shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>, multi-head attention [<xref ref-type="bibr" rid="ref40">40</xref>] comprises <italic>H</italic> self-attentions, which can thoroughly learn the similarity between nodes and calculate the importance of each node so that the model can focus on more critical node features. Let <italic>W</italic><sub>i</sub><sup>Q</sup>, <italic>W</italic><sub>i</sub><sup>K</sup>, and <italic>W</italic><sub>i</sub><sup>V</sup> be the <italic>i</italic>th initialized weight matrix of <italic>Q</italic>, <italic>K,</italic> and <italic>V</italic>, known by equation 7:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i6e37804_fig23.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>Here, <inline-graphic xlink:href="medinform_v10i6e37804_fig24.png" xlink:type="simple" mimetype="image"/>, <inline-graphic xlink:href="medinform_v10i6e37804_fig25.png" xlink:type="simple" mimetype="image"/>, <inline-graphic xlink:href="medinform_v10i6e37804_fig26.png" xlink:type="simple" mimetype="image"/>, and <italic>d</italic><sub>k</sub>=<italic>d</italic><sub>v</sub>=<italic>m</italic>/<italic>H</italic>.</p>
          <p>We calculated the scoring matrix of the <italic>i</italic>th head according to equation 8. After concatenating <italic>H</italic> heads, we used equation 9 to obtain the attention output matrix <italic>M</italic>. <inline-graphic xlink:href="medinform_v10i6e37804_fig27.png" xlink:type="simple" mimetype="image"/> is the linear transformation matrix:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i6e37804_fig28.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            <graphic xlink:href="medinform_v10i6e37804_fig29.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </sec>
      </sec>
      <sec>
        <title>Joint Extraction Layer</title>
        <sec>
          <title>Tagger</title>
          <p>The tagger comprises a unidirectional LSTM that takes the context representation given by BiLSTM as the input and the syntactic dependency representation generated by the attention GCN module to parse the information of the previous layer. Let <inline-graphic xlink:href="medinform_v10i6e37804_fig30.png" xlink:type="simple" mimetype="image"/>. After the tagger module, we obtained the output matrix <italic>O</italic>, which was sent to the conditional probability extraction module.</p>
        </sec>
        <sec>
          <title>Conditional Probability Extraction</title>
          <p>Most joint extraction models input the same source information into different subtask classifiers simultaneously to achieve information sharing, as shown in equation 10, where <inline-graphic xlink:href="medinform_v10i6e37804_fig31.png" xlink:type="simple" mimetype="image"/> is the output of the trigger in time step <italic>i</italic> and <inline-graphic xlink:href="medinform_v10i6e37804_fig32.png" xlink:type="simple" mimetype="image"/> is the output of the argument in step <italic>j</italic>.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i6e37804_fig33.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>However, when the occurrence frequency of 2 subtasks in the same data set varies significantly, the model easily focuses on high-frequency subtasks and ignores low-frequency subtasks. Similar to the biomedical event extraction task, for the trigger recognition and event argument detection subtasks, each event trigger (ie, biomedical event) may contain 0, 1, or 2 participating elements, and the participating element may also be another event; therefore, the contribution of the trigger recognition task will be greater than that of the event argument detection task. To alleviate the abovementioned problems and reduce the cascading errors between these 2 subtasks, we combined the softmax output after trigger recognition and the source information to extract the trigger vector <italic>Tr</italic><sub>i</sub> and event argument vector <italic>Can</italic><sub>j</sub> according to the location of triggers and candidate arguments. Finally, by aggregating and inputting them into the event extraction classifier and learning the distribution features of the trigger label, our model directly achieved biomedical event extraction without postprocessing.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i6e37804_fig34.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>Here, <italic>W<sup>tri</sup></italic> and <italic>b<sup>tri</sup></italic> are the weight matrix and bias for trigger recognition, separately. The probability output of the trigger softmax of the <italic>k</italic>th word is <italic>soft</italic><sub>k</sub>. <italic>W<sup>event</sup></italic> and <italic>b<sup>event</sup></italic> are the weight matrix and bias for event extraction, separately. The number of words of the <italic>i</italic>th trigger and the <italic>j</italic>th candidate argument are <italic>i</italic><sub>m</sub> and <italic>j</italic><sub>n</sub>, separately. <italic>O</italic><sub>k</sub> is the source information vector of the <italic>k</italic>th word.</p>
          <p>Comparing equation 10 with equation 11, we found that it only realizes the joint extraction of triggers and event arguments using equation 10; therefore, it needs postprocessing to seek out the tuple of events. However, owing to the aggregation of trigger distribution information, we can discover which event argument belongs to the trigger of step <italic>t</italic> using equation 11.</p>
        </sec>
      </sec>
      <sec>
        <title>Joint Dice Loss</title>
        <p>Owing to the sparse data of the biomedical event corpus and the imbalance between positive and negative examples, the cross-entropy or negative log-likelihood loss function causes a large discrepancy between precision and recall. To alleviate this problem, we propose using a joint weight self-adjusting Dice loss function [<xref ref-type="bibr" rid="ref41">41</xref>], as follows:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i6e37804_fig35.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>Here, <italic>N</italic> is the number of sentences in the corpus; <italic>n</italic><sub>p</sub>, <italic>t</italic><sub>p</sub>, and <italic>e</italic><sub>p</sub> are the number of tokens, extracted trigger candidates, and arguments of the <italic>l</italic>th sentence, λ is for smoothing purposes, β is a hyperparameter to adjust the loss, and θ is the model’s parameters that should be trained.</p>
      </sec>
      <sec>
        <title>Training</title>
        <p>The CPJE model was trained using several epochs. In each epoch, we divided the training set into batches, each containing a list of sentences and each sentence containing a set of tokens of variable lengths. One batch was in progress at a time step.</p>
        <p>For each batch, we first ran the information extraction layer to generate the context representation <inline-graphic xlink:href="medinform_v10i6e37804_fig36.png" xlink:type="simple" mimetype="image"/>  and the attention representation with syntactic information <inline-graphic xlink:href="medinform_v10i6e37804_fig37.png" xlink:type="simple" mimetype="image"/>. Then, we combined <italic>L</italic> and <italic>M</italic> as the input of LSTM to generate source information <italic>O</italic>. In the end, we ran the joint extraction layer to compute gradients for overall network output (triggers and events). After that, we back propagated the errors from the output to the input through CPJE and updated all the network parameters. The overall procedure of the CPJE model is summarized in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref>.</p>
        <boxed-text id="box2" position="float">
          <title>The training procedure of the conditional probability joint extraction model.</title>
          <p>
            <bold>Input</bold>
          </p>
          <list list-type="order">
            <list-item>
              <p>Sequence of tokens {<italic>w</italic><sub>1</sub>,...,<italic>w</italic><sub>n</sub>} along with corresponding event labels</p>
            </list-item>
            <list-item>
              <p>Set of edges {<italic>e</italic><sub>12</sub>,...,<italic>e</italic><sub>ij</sub>,...,<italic>e</italic><sub>mn</sub>} for each corresponding token</p>
            </list-item>
          </list>
          <p>
            <bold>Output</bold>
          </p>
          <p>All parameters in the conditional probability joint extraction model</p>
          <list list-type="order">
            <list-item>
              <p>For each epoch do</p>
            </list-item>
            <list-item>
              <p>For each epoch do</p>
            </list-item>
            <list-item>
              <p>Generate <italic>L</italic> and <italic>M</italic> by information extraction layer via equations 3 and 9</p>
            </list-item>
            <list-item>
              <p>Concatenate <italic>L</italic> and <italic>M</italic> as <italic>T</italic></p>
            </list-item>
            <list-item>
              <p>Generate the source information <italic>O</italic>={<italic>o</italic><sub>1</sub>,...,<italic>o</italic><sub>n</sub>} by long short-term memory</p>
            </list-item>
            <list-item>
              <p>Compute the trigger scores <italic>y</italic><sub>t</sub> and the trigger softmax probability <italic>soft</italic> by the “SoftMax Trigger” block in the joint extraction layer via the first equation in equation 11</p>
            </list-item>
            <list-item>
              <p>Fuse <italic>O</italic> and <italic>soft</italic> via the second and third equations in equation 11</p>
            </list-item>
            <list-item>
              <p>Compute the event scores <italic>y</italic><sub>t</sub>. by the “SoftMax Event” block in the joint extraction layer via the fourth equation in equation 11</p>
            </list-item>
            <list-item>
              <p>Update the parameters by the back propagation algorithm</p>
            </list-item>
            <list-item>
              <p>End for</p>
            </list-item>
            <list-item>
              <p>End for</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
      <sec>
        <title>Data</title>
        <p>Our experiments were conducted mainly on the MLEE corpus [<xref ref-type="bibr" rid="ref6">6</xref>], as shown in <xref ref-type="table" rid="table2">Table 2</xref>, which has 4 categories containing 19 predefined trigger subcategories. There are 262 documents with 56,588 words in total, with 8291 entities and 6677 events. From <xref ref-type="table" rid="table2">Table 2</xref>, we note that the number of anatomical-level events is higher than the number of molecular-level and planned-level events, although general biomedical events dominate overall. Overall, 18% (1202/6677) of the total events involved either direct or indirect arguments at both the molecular and anatomical levels. From <xref ref-type="table" rid="table1">Table 1</xref>, we find that the arguments of regulation, positive regulation, negative regulation, and planned process events may not be only entities but also other events; therefore, these events are nested events, which account for approximately 54.87% (3664/6677) of all events.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>The multilevel event extraction statistical information.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="170"/>
            <col width="0"/>
            <col width="200"/>
            <col width="0"/>
            <col width="200"/>
            <col width="0"/>
            <col width="200"/>
            <col width="0"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Item</td>
                <td colspan="2">Training, n (%)</td>
                <td colspan="2">Development, n (%)</td>
                <td colspan="2">Test, n (%)</td>
                <td>Total, N</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">Document</td>
                <td colspan="2">131 (50)</td>
                <td colspan="2">44 (16.8)</td>
                <td colspan="2">87 (33.2)</td>
                <td>262</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Sentence</td>
                <td colspan="2">1271 (48.73)</td>
                <td colspan="2">457 (17.52)</td>
                <td colspan="2">880 (33.74)</td>
                <td>2608</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Word</td>
                <td colspan="2">27,875 (49.26)</td>
                <td colspan="2">9610 (16.98)</td>
                <td colspan="2">19,103 (33.76)</td>
                <td>56,588</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Entity</td>
                <td colspan="2">4147 (50.02)</td>
                <td colspan="2">1431 (17.26)</td>
                <td colspan="2">2713 (32.72)</td>
                <td>8291</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Event</bold>
                </td>
                <td colspan="2">3296 (49.36)</td>
                <td colspan="2">1175 (17.6)</td>
                <td colspan="2">2206 (33.04)</td>
                <td>6677</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Anatomical</td>
                <td colspan="2">810 (48.36)</td>
                <td colspan="2">269 (16.06)</td>
                <td colspan="2">596 (35.58)</td>
                <td colspan="2">1675</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Molecular</td>
                <td colspan="2">340 (48.2)</td>
                <td colspan="2">125 (17.7)</td>
                <td colspan="2">240 (34.0)</td>
                <td colspan="2">705</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>General</td>
                <td colspan="2">1851 (50.66)</td>
                <td colspan="2">627 (17.16)</td>
                <td colspan="2">1176 (32.18)</td>
                <td colspan="2">3654</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Planned</td>
                <td colspan="2">295 (45.9)</td>
                <td colspan="2">154 (24.0)</td>
                <td colspan="2">194 (30.2)</td>
                <td colspan="2">643</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>In addition, we verified our experiment using the BioNLP-ST 2011 GE corpus [<xref ref-type="bibr" rid="ref7">7</xref>]. As shown in <xref ref-type="table" rid="table3">Table 3</xref>, the BioNLP-ST 2011 GE corpus defines 9 biomedical event types. It is noted that a <italic>binding</italic> event probably requires &#62;1 protein entity as its theme argument, and a <italic>regulation</italic> event is likely to require a protein or an event as its theme argument and needs a protein or an event as its cause argument. There were 37.20% (9288/24,967) of events (regulation, positive regulation, and negative regulation) that led to a nested structure.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>The primary event types and core argument roles in the BioNLP-STa 2011 GEb corpus and the important statistical information of the GE corpus.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="300"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Event types and BioNLP-ST 2011 GE items</td>
                <td>Core arguments</td>
                <td>Values, N</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Event type</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gene expression</td>
                <td>Theme (protein)</td>
                <td>N/A<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Transcription</td>
                <td>Theme (protein)</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Protein catabolism</td>
                <td>Theme (protein)</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Phosphorylation</td>
                <td>Theme (protein)</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Localization</td>
                <td>Theme (protein)</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Binding</td>
                <td>Theme (protein)<sup>d</sup></td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Regulation</td>
                <td>Theme (protein or event) and cause (protein or event)</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Positive regulation</td>
                <td>Theme (protein or event) and cause (protein or event)</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Negative regulation</td>
                <td>Theme (protein or event) and cause (protein or event)</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>BioNLP-ST 2011 GE corpus statistics</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Document</td>
                <td>N/A</td>
                <td>1224</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word</td>
                <td>N/A</td>
                <td>348,908</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Entity</td>
                <td>N/A</td>
                <td>21,616</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Event</td>
                <td>N/A</td>
                <td>24,967</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>BioNLP-ST: BioNLP shared task.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>GE: Genia event.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>N/A: not applicable.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>Represents the number of arguments &#62;1.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Hyperparameter Setting</title>
        <p>For the hyperparameter settings of our experiment, we used 768 dimensions for the BioBERT embeddings and set 64 dimensions for the POS-tagging and entity label embeddings. We applied a 1-layer BiLSTM with 128 hidden units and used a 2-layer GCN and 2-head self-attention for our model. The dropout rate was 0.3, the learning rate was 0.01, and the optimization function was stochastic gradient descent (SGD). The training of our CPJE model was based on the operating system of Ubuntu 20.04, using PyTorch (version 1.9.0) and Python (version 3.8.8). The graphics processing unit was an NVIDIA TITAN Xp with 12 GB of memory.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overall Performance on MLEE</title>
        <p>We compare our performance with the baselines shown in <xref ref-type="boxed-text" rid="box3">Textbox 3</xref>.</p>
        <boxed-text id="box3" position="float">
          <title>Baselines for performance.</title>
          <p>
            <bold>EventMine</bold>
          </p>
          <p>Pyysalo et al [<xref ref-type="bibr" rid="ref6">6</xref>] applied a pipeline-based event extraction system, mainly relying on support vector machine classifiers to implement trigger recognition and event extraction.</p>
          <p>
            <bold>Semisupervised learning</bold>
          </p>
          <p>This is a semisupervised learning framework proposed by Zhou et al [<xref ref-type="bibr" rid="ref30">30</xref>], which can use unannotated data to extract biomedical events.</p>
          <p>
            <bold>Convolutional neural network</bold>
          </p>
          <p>Wang et al [<xref ref-type="bibr" rid="ref3">3</xref>] used convolutional neural networks and multiple distributed feature vector representations to achieve event extraction tasks.</p>
          <p>
            <bold>mdBLSTM (bidirectional long short-term memory with a multilevel attention mechanism and dependency-based word embeddings)</bold>
          </p>
          <p>He et al [<xref ref-type="bibr" rid="ref5">5</xref>] proposed a bidirectional long short-term memory neural network based on a multilevel attention mechanism and dependency-based word embeddings to extract biomedical events.</p>
          <p>
            <bold>Reinforcement learning+knowledge bases</bold>
          </p>
          <p>Zhao et al [<xref ref-type="bibr" rid="ref32">32</xref>] proposed a framework of reinforcement learning with external biomedical knowledge bases for extracting biomedical events.</p>
          <p>
            <bold>DeepEventMine</bold>
          </p>
          <p>Trieu et al [<xref ref-type="bibr" rid="ref35">35</xref>] proposed an end-to-end neural model. It uses a multioverlapping directed acyclic graph to detect nested biomedical entities, triggers, roles, and events.</p>
          <p>
            <bold>Hierarchical artificial neural network</bold>
          </p>
          <p>Zhao et al [<xref ref-type="bibr" rid="ref36">36</xref>] proposed a 2-level modeling method for document-level joint biomedical event extraction.</p>
        </boxed-text>
        <p><xref ref-type="table" rid="table4">Table 4</xref> illustrates the overall performance against the state-of-the-art methods with gold standard entities. As seen in this table, our CPJE model achieved only a slight improvement in the trigger recognition task. For the event extraction task, the F<sub>1</sub> score was significantly better than the other baselines. Notably, the gap between the precision and recall of our model was much smaller than that of the mdBLSTM (bidirectional long short-term memory with a multilevel attention mechanism and dependency-based word embeddings) model, and the precision was much better than that of the RL+KBs model. This indicates that our model had a better effect on reducing cascading errors than the pipeline models. In addition, the hierarchical artificial neural network (HANN) model was also a joint extraction model; however, its performance is disappointing. This is because the HANN model focuses on extracting document-level biomedical events, which contain many cross-sentence entities, triggers, and events. However, other models aim to extract sentence-level events; therefore, the performance of these models is better than that of the HANN model.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Overall performance on multilevel event extraction compared with the state-of-the-art methods with gold standard entities.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="230"/>
            <col width="130"/>
            <col width="110"/>
            <col width="150"/>
            <col width="0"/>
            <col width="140"/>
            <col width="130"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td>Method</td>
                <td colspan="4">Trigger recognition (%)</td>
                <td colspan="3">Event extraction (%)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td>Recall</td>
                <td>F<sub>1</sub> score</td>
                <td colspan="2">Precision</td>
                <td>Recall</td>
                <td>F<sub>1</sub> score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>EventMine<sup>a</sup></td>
                <td>70.79</td>
                <td>81.69</td>
                <td>75.84</td>
                <td colspan="2">62.28</td>
                <td>49.56</td>
                <td>55.20</td>
              </tr>
              <tr valign="top">
                <td>SSL<sup>a,b</sup></td>
                <td>72.17</td>
                <td>82.26</td>
                <td>76.89</td>
                <td colspan="2">55.76</td>
                <td>59.16</td>
                <td>57.41</td>
              </tr>
              <tr valign="top">
                <td>CNN<sup>a,c</sup></td>
                <td>80.92</td>
                <td>75.23</td>
                <td>77.97</td>
                <td colspan="2">60.56</td>
                <td>56.23</td>
                <td>58.31</td>
              </tr>
              <tr valign="top">
                <td>mdBLSTM<sup>a,d</sup></td>
                <td>82.79</td>
                <td>76.56</td>
                <td>79.55</td>
                <td colspan="2">90.24</td>
                <td>44.50</td>
                <td>59.61</td>
              </tr>
              <tr valign="top">
                <td>RL<sup>e</sup>+KBs<sup>a,f</sup></td>
                <td>N/A<sup>g</sup></td>
                <td>N/A</td>
                <td>N/A</td>
                <td colspan="2">63.78</td>
                <td>56.81</td>
                <td>60.09</td>
              </tr>
              <tr valign="top">
                <td>DeepEventMine<sup>h</sup></td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td colspan="2">69.91</td>
                <td>55.49</td>
                <td>61.87</td>
              </tr>
              <tr valign="top">
                <td>HANN<sup>h,i</sup></td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td colspan="2">63.91</td>
                <td>56.08</td>
                <td>59.74</td>
              </tr>
              <tr valign="top">
                <td>Our model<sup>h</sup></td>
                <td>82.20</td>
                <td>78.25</td>
                <td>80.18</td>
                <td colspan="2">72.26</td>
                <td>55.23</td>
                <td>62.80<sup>j</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>Pipeline model.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>SSL: semisupervised learning.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>CNN: convolutional neural network.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>mdBLSTM: bidirectional long short-term memory with a multilevel attention mechanism and dependency-based word embeddings</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>RL: reinforcement learning.</p>
            </fn>
            <fn id="table4fn6">
              <p><sup>f</sup>KB: knowledge base</p>
            </fn>
            <fn id="table4fn7">
              <p><sup>g</sup>N/A: not applicable.</p>
            </fn>
            <fn id="table4fn8">
              <p><sup>h</sup>Joint model.</p>
            </fn>
            <fn id="table4fn9">
              <p><sup>i</sup>HANN: hierarchical artificial neural network.</p>
            </fn>
            <fn id="table4fn10">
              <p><sup>j</sup>The best value compared with baselines.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>The Performance for Nested Events on MLEE</title>
        <p>To evaluate the effectiveness of our model for improving the nested biomedical event extraction, we split the test set into 2 parts (<italic>simple</italic> and <italic>nested</italic>). <italic>Simple</italic> means that 1 event only regards the entities as its arguments; <italic>nested</italic> means that one of the arguments of an event may be another event. In general, nested events are present in regulation, positive regulation, negative regulation, and planned process events.</p>
        <p><xref ref-type="table" rid="table5">Table 5</xref> illustrates the performance (F<sub>1</sub> scores) of the CNN model [<xref ref-type="bibr" rid="ref3">3</xref>], the RL+KBs model [<xref ref-type="bibr" rid="ref32">32</xref>], the DeepEventMine [<xref ref-type="bibr" rid="ref35">35</xref>] model, the HANN [<xref ref-type="bibr" rid="ref36">36</xref>] model, and our model in the trigger recognition and event extraction subtasks. In the <italic>simple</italic> and <italic>nested</italic> data of triggers, our framework was 0.44% and 1.25% better than the CNN model, which demonstrates that our model can improve the performance of trigger recognition. However, there is no significant difference between simple and nested triggers. In the <italic>nested</italic> data of events, our model was 6.97% higher than the CNN model, 2.57% higher than the RL+KBs model, 9.53% higher than the DeepEventMine model, and 15.8% higher than the HANN model, which illustrates that our CPJE model of using a gate GCN and an attention mechanism helps to enhance the performance of extracting nested events.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>The F1 score performance on simple events, nested events, and all events on the multilevel event extraction corpus.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="400"/>
            <col width="0"/>
            <col width="210"/>
            <col width="0"/>
            <col width="210"/>
            <col width="0"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Subtask and model</td>
                <td colspan="2">Simple (%)</td>
                <td colspan="2">Nested (%)</td>
                <td>All (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>Trigger</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CNN<sup>a</sup></td>
                <td colspan="2">79.52</td>
                <td colspan="2">78.80</td>
                <td colspan="2">78.52</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RL<sup>b</sup>+KBs<sup>c</sup></td>
                <td colspan="2">N/A<sup>d</sup></td>
                <td colspan="2">N/A</td>
                <td colspan="2">N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DeepEventMine</td>
                <td colspan="2">N/A</td>
                <td colspan="2">79.12</td>
                <td colspan="2">N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>HANN<sup>e</sup></td>
                <td colspan="2">N/A</td>
                <td colspan="2">N/A</td>
                <td colspan="2">N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Our model</td>
                <td colspan="2">79.96<sup>f</sup></td>
                <td colspan="2">80.05<sup>f</sup></td>
                <td colspan="2">80.18<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Event</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CNN</td>
                <td colspan="2">61.33</td>
                <td colspan="2">54.29</td>
                <td colspan="2">58.87</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RL+KBs</td>
                <td colspan="2">N/A</td>
                <td colspan="2">58.69</td>
                <td colspan="2">60.09</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DeepEventMine</td>
                <td colspan="2">N/A</td>
                <td colspan="2">51.73</td>
                <td colspan="2">61.87</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>HANN</td>
                <td colspan="2">77.08<sup>f</sup></td>
                <td colspan="2">45.46</td>
                <td colspan="2">59.74</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Our model</td>
                <td colspan="2">64.85</td>
                <td colspan="2">61.26<sup>f</sup></td>
                <td colspan="2">62.80<sup>f</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>CNN: convolutional neural network.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>RL: reinforcement learning.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>KB: knowledge base.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>N/A: not applicable.</p>
            </fn>
            <fn id="table5fn5">
              <p><sup>e</sup>HANN: hierarchical artificial neural network.</p>
            </fn>
            <fn id="table5fn6">
              <p><sup>f</sup>The best value compared with other models.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>The Performance for All Events on MLEE</title>
        <p>To illustrate the impact of our framework on different events in more detail, <xref ref-type="table" rid="table6">Table 6</xref> presents the event extraction performance for all event types. From this table, we obtain the best extraction performance for dephosphorylation events and the worst performance for transcription events. In addition, the catabolic events had the best extraction precision, and the phosphorylation events had the best extraction recall rate.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>The extraction performance for different events on multilevel event extraction corpus.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="450"/>
            <col width="190"/>
            <col width="180"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Events</td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td>F<sub>1</sub> score (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Cell proliferation</td>
                <td>62.50</td>
                <td>58.57</td>
                <td>60.47</td>
              </tr>
              <tr valign="top">
                <td>Development</td>
                <td>51.82</td>
                <td>66.43</td>
                <td>58.22</td>
              </tr>
              <tr valign="top">
                <td>Blood vessel development</td>
                <td>90.42</td>
                <td>72.66</td>
                <td>80.57</td>
              </tr>
              <tr valign="top">
                <td>Growth</td>
                <td>78.02</td>
                <td>50.58</td>
                <td>61.37</td>
              </tr>
              <tr valign="top">
                <td>Death</td>
                <td>79.12</td>
                <td>44.32</td>
                <td>56.81</td>
              </tr>
              <tr valign="top">
                <td>Breakdown</td>
                <td>71.30</td>
                <td>48.30</td>
                <td>57.59</td>
              </tr>
              <tr valign="top">
                <td>Remodeling</td>
                <td>85.71</td>
                <td>58.32</td>
                <td>69.41</td>
              </tr>
              <tr valign="top">
                <td>Synthesis</td>
                <td>48.00</td>
                <td>20.30</td>
                <td>28.53</td>
              </tr>
              <tr valign="top">
                <td>Gene expression</td>
                <td>74.72</td>
                <td>82.42</td>
                <td>78.38</td>
              </tr>
              <tr valign="top">
                <td>Transcription</td>
                <td>16.67</td>
                <td>33.33</td>
                <td>22.22</td>
              </tr>
              <tr valign="top">
                <td>Catabolism</td>
                <td>100.00</td>
                <td>50.00</td>
                <td>66.67</td>
              </tr>
              <tr valign="top">
                <td>Phosphorylation</td>
                <td>90.00</td>
                <td>100.00</td>
                <td>94.74</td>
              </tr>
              <tr valign="top">
                <td>Dephosphorylation</td>
                <td>100.00</td>
                <td>100.00</td>
                <td>100.00</td>
              </tr>
              <tr valign="top">
                <td>Localization</td>
                <td>76.86</td>
                <td>49.98</td>
                <td>60.57</td>
              </tr>
              <tr valign="top">
                <td>Binding</td>
                <td>74.52</td>
                <td>51.23</td>
                <td>60.71</td>
              </tr>
              <tr valign="top">
                <td>Regulation</td>
                <td>63.82</td>
                <td>51.49</td>
                <td>56.99</td>
              </tr>
              <tr valign="top">
                <td>Positive regulation</td>
                <td>78.28</td>
                <td>50.66</td>
                <td>61.51</td>
              </tr>
              <tr valign="top">
                <td>Negative regulation</td>
                <td>64.35</td>
                <td>54.69</td>
                <td>59.13</td>
              </tr>
              <tr valign="top">
                <td>Planned process</td>
                <td>69.57</td>
                <td>51.86</td>
                <td>59.42</td>
              </tr>
              <tr valign="top">
                <td>All</td>
                <td>64.85</td>
                <td>61.26</td>
                <td>62.80</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Overall Performance on BioNLP-ST 2011 GE</title>
        <p>To improve persuasion, we extended our experiment to the BioNLP-ST 2011 GE corpus. We compared our event extraction results with those of previous systems using the same corpus, as shown in <xref ref-type="table" rid="table7">Table 7</xref>. Among them, the Turku Event Extraction System (TEES) [<xref ref-type="bibr" rid="ref42">42</xref>], EventMine [<xref ref-type="bibr" rid="ref6">6</xref>], and stacked generalization [<xref ref-type="bibr" rid="ref25">25</xref>] systems are based on support vector machines with designed features. The TEES-CNNs [<xref ref-type="bibr" rid="ref4">4</xref>] are CNNs integrated into the TEES system to extract relations and events. The DeepEventMine [<xref ref-type="bibr" rid="ref35">35</xref>] is based on bidirectional transformers and an overlapping directed acyclic graph to jointly extract biomedical events. The HANN [<xref ref-type="bibr" rid="ref36">36</xref>] model relies on the GCN and hypergraph to obtain local and global contexts. The KB-driven tree LSTM [<xref ref-type="bibr" rid="ref33">33</xref>] depends on KB concept embedding to improve the pretrained distributed word representations. The Graph Edge-conditioned Attention Networks with Science BERT (GEANet-SciBERT) [<xref ref-type="bibr" rid="ref34">34</xref>] adopts a hierarchical graph representation encoded by graph edge-conditioned attention networks to incorporate domain knowledge from the Unified Medical Language System into a pretrained language model. <xref ref-type="table" rid="table7">Table 7</xref> illustrates that except for the DeepEventMine, our approach outperformed all previous methods.</p>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>The performance of biomedical event extraction on the BioNLP shared task 2011 Genia event corpus.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="200"/>
            <col width="150"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Method and event type</td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td>F<sub>1</sub> score (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>TEES<sup>a,b</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Event total<sup>c</sup></td>
                <td>57.65</td>
                <td>49.56</td>
                <td>53.30</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>EventMine<sup>a</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Event total</td>
                <td>63.48</td>
                <td>53.35</td>
                <td>57.98</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Stacked generalization<sup>a</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Event total</td>
                <td>66.46</td>
                <td>48.96</td>
                <td>56.38</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>TEES-CNNs<sup>a,d</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Event total</td>
                <td>69.45</td>
                <td>49.94</td>
                <td>58.07</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>HANN<sup>e,f</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Event total</td>
                <td>71.73</td>
                <td>53.21</td>
                <td>61.10</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>KB<sup>g</sup>-driven tree LSTM<sup>e,h</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Simple total<sup>i</sup></td>
                <td>85.95</td>
                <td>72.62</td>
                <td>78.73</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Binding</td>
                <td>53.16</td>
                <td>37.68</td>
                <td>44.10</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Regulation total<sup>j</sup></td>
                <td>55.73</td>
                <td>41.73</td>
                <td>47.72</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Event total</td>
                <td>67.10</td>
                <td>52.14</td>
                <td>58.65</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>GEANet-SciBERT<sup>e,k</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Regulation total</td>
                <td>55.21</td>
                <td>47.23</td>
                <td>50.91</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Event total</td>
                <td>64.61</td>
                <td>56.11</td>
                <td>60.06</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>DeepEventMine<sup>e</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Regulation total</td>
                <td>62.36</td>
                <td>51.88</td>
                <td>56.64<sup>l</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Event total</td>
                <td>76.28</td>
                <td>55.06</td>
                <td>63.96<sup>l</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Our model<sup>e</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Simple total</td>
                <td>82.23</td>
                <td>78.88</td>
                <td>80.52</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Binding</td>
                <td>55.12</td>
                <td>37.48</td>
                <td>44.62</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Regulation total</td>
                <td>57.82</td>
                <td>46.39</td>
                <td>51.48</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Event total</td>
                <td>72.62</td>
                <td>53.33</td>
                <td>61.50</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>Pipeline model.</p>
            </fn>
            <fn id="table7fn2">
              <p><sup>b</sup>TEES: Turku Event Extraction System.</p>
            </fn>
            <fn id="table7fn3">
              <p><sup>c</sup>Represents the overall performance on the test set.</p>
            </fn>
            <fn id="table7fn4">
              <p><sup>d</sup>CNN: convolutional neural network.</p>
            </fn>
            <fn id="table7fn5">
              <p><sup>e</sup>Joint model.</p>
            </fn>
            <fn id="table7fn6">
              <p><sup>f</sup>HANN: hierarchical artificial neural network.</p>
            </fn>
            <fn id="table7fn7">
              <p><sup>g</sup>KB: knowledge base.</p>
            </fn>
            <fn id="table7fn8">
              <p><sup>h</sup>LSTM: long short-term memory.</p>
            </fn>
            <fn id="table7fn9">
              <p><sup>i</sup>Represents the overall performance for simple events on the test set.</p>
            </fn>
            <fn id="table7fn10">
              <p><sup>j</sup>Represents the overall performance for nested events on the test set (including regulation, positive regulation, and negative regulation subevents).</p>
            </fn>
            <fn id="table7fn11">
              <p><sup>k</sup>GEANet-SciBERT: Graph Edge-conditioned Attention Networks with Science BERT.</p>
            </fn>
            <fn id="table7fn12">
              <p><sup>l</sup>The best value compared with other models.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The KB-driven tree LSTM and GEANet-SciBERT both draw on the KB to enhance the semantic representation of words to improve the extraction performance of nested (regulation) events. However, the KB-driven tree LSTM only leverages traditional static word embedding, which cannot deeply integrate information from the KB; thus, its performance on nested events is unsatisfactory.</p>
        <p>Unlike the KB-driven tree LSTM method, the GEANet-SciBERT model uses a specialized medical KB and scientific information to enrich the dynamic semantic representation of Bidirectional Encoder Representation from Transformers (BERT) and enhances the capability of inferring nested events via a novel GNN. Thus, the F<sub>1</sub> scores for the nested event extraction were significantly boosted.</p>
        <p>Interestingly, the DeepEventMine had an outstanding performance for extracting nested biomedical events on BioNLP-ST 2011 GE but had a passive performance on MLEE. There are three reasons for this fact. First, the DeepEventMine model jointly learns 4 biomedical information tasks (entity detection, trigger detection, role detection, and event detection), which can share more biomedical features and knowledge when model training. Second, the DeepEventMine model uses a more complex graph structure (multiple overlapping directed acyclic graphs) to obtain rich syntactic information. (Finally, the BioNLP-ST 2011 GE data set size is larger than that of the MLEE data set; thus, the DeepEventMine model can be fully trained on a large corpus and enhance the performance of extracting nested events.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>In this section, we will study and discuss the performance of our CPJE model using the MLEE corpus.</p>
      <sec>
        <title>Ablation Study</title>
        <sec>
          <title>The Impact of the BiLSTM</title>
          <p>Although the output of BioBERT contains rich semantic information, it has some noise impact on semantic information after concatenating POS embedding, entity embedding, and BioBERT embedding. In addition, the dimension of the BioBERT output is 768, and the total size after concatenation is more extensive, which tends to cause the phenomenon of combination explosion in the feature space. Therefore, we considered using a BiLSTM, which reduces the total dimension and integrates other information with the BioBERT information to obtain a richer semantic representation.</p>
          <p>If we remove the BiLSTM layer, the trigger recognition precision is dropped from 82.20% to 75.64%, and the trigger recognition F<sub>1</sub> score is dropped from 80.18% to 76.39%, which further affects the event extraction performance (the event extraction F<sub>1</sub> score is fell from 62.80% to 58.02%).</p>
        </sec>
        <sec>
          <title>The Impact of Softmax Probability</title>
          <p>To evaluate the contribution of the softmax probability distribution after trigger prediction to the event extraction task, we used the traditional joint extraction method (as shown in equation 10), which only uses source information when extracting candidate trigger vectors and event argument vectors.</p>
          <p>If we only use the source information (soft trigger) for joint extraction, the event extraction task lacks the probability distribution information after trigger recognition, which results in a decline in the recall rate of the model and further affects the F<sub>1</sub> scores (the event extraction F<sub>1</sub> score is dropped from 62.80% to 60.09%). However, the overall result is still slightly higher than the pipeline baseline, which also reflects that joint extraction can eliminate cascading errors.</p>
        </sec>
        <sec>
          <title>The Impact of GCN</title>
          <p>We removed the syntactic structure to evaluate the importance of the GCN network; therefore, the GCN module was useless in our model. If the model lacks the GCN component, the performance of trigger recognition is slightly degraded (the trigger recognition F<sub>1</sub> score is fell from 80.18% to 78.78%), and the result of event extraction is significantly worse than that of the proposed model (the event extraction F<sub>1</sub> score is fell from 62.80% to 58.40%).</p>
          <p>As the syntactic structure can provide significant potential information for event extraction, the GCN model can be aware of the direction of information flow in syntactic structures and capture these features effectively. Therefore, the GCN model is vital for event extraction.</p>
        </sec>
        <sec>
          <title>The Impact of Dice Loss</title>
          <p>In the face of an imbalance in biomedical corpora, we used the Dice loss function. To verify that the Dice loss function had a better effect on event extraction, we used the cross-entropy loss function for comparison.</p>
          <p>A significantly large number of negative examples in the data set indicates that easy-negative examples are extensive. A large number of straightforward examples overwhelmed the training, making the model insufficient to distinguish between positive and hard-negative examples. As the cross-entropy loss is accuracy oriented and each instance contributes equally to the loss function, the precision of the model increases (the event extraction precision is risen from 72.26% to 89.26%), but the F<sub>1</sub> scores do not increase (the event extraction F<sub>1</sub> score is dropped from 62.60% to 60.30%). Dice loss is a muted version of the F<sub>1</sub> score—the harmonic mean of precision and recall. When the positive and negative examples in the data set are unbalanced, the Dice loss will reduce the focus on the easy-negative sample and increase the attention on positive and hard-negative samples, thereby balancing the precision and recall values and increasing the F<sub>1</sub> scores.</p>
        </sec>
      </sec>
      <sec>
        <title>Visualization</title>
        <p>For the effectiveness of the attention-based gate GCN, we used the sentence “Effects of spironolactone on corneal allograft survival in the rat” in <xref rid="figure3" ref-type="fig">Figure 3</xref> as an example to illustrate the captured interaction features. From <xref rid="figure3" ref-type="fig">Figure 3</xref>B, we know this sentence contains 2 events: a <italic>regulation</italic> event caused by <italic>effects</italic> and a <italic>death</italic> event caused by <italic>survival</italic>. In addition, a death event is one of the arguments for the regulation event.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>An example of attention-based gate graph neural network effectiveness. (A) Row-wise heap map, where each row is an array of average scores of the 2 heads obtained from the multi-head attention mechanism. The darker the color, the higher the score and the stronger the interaction. (B) Dependency parsing result produced by Stanford CoreNLP and the golden relationships between event triggers and arguments, where yellow boxes represent entity type, and the blue boxes represent event type.</p>
          </caption>
          <graphic xlink:href="medinform_v10i6e37804_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>As we can see in <xref rid="figure3" ref-type="fig">Figure 3</xref>A, the <italic>effects</italic> row has moderately strong links with <italic>Effects</italic> (self), spironolactone (its argument), and <italic>survival</italic> (its argument and another event). Meanwhile, the <italic>survival</italic> row has strong links with <italic>survival</italic> (self), <italic>effects</italic> (another event), and <italic>corneal allograft</italic> (its argument). In addition, the words <italic>rat</italic> and <italic>on</italic> also have strong connections with <italic>survival</italic>, which means that the syntactic dependency information generated by parsing is propagated through the GCN.</p>
      </sec>
      <sec>
        <title>Case Study</title>
        <sec>
          <title>Overview</title>
          <p>Our framework has not achieved state-of-the-art results for the BioNLP-ST 2011 GE corpus. However, the performance of extracting nested biomedical events is satisfactory, particularly in the MLEE corpus. To more intuitively demonstrate the performance of our model in extracting nested biomedical events, we analyzed 3 examples of nested events selected from the MLEE test set to study the strengths and weaknesses of our model compared with the CNN [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        </sec>
        <sec>
          <title>Case 1</title>
          <p>As shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>, case 1 is a simple nested event, where the role type of event argument is only the <italic>theme</italic>. It is a nested event; however, both the CNN and our model obtained correct event extraction results. This is because this sentence does not have a complete component, and perhaps, it is only a part of a complete sentence. The simpler the sentence structure is, the easier it is for the model to extract practical features. Therefore, the extraction performance for such nested events is generally favorable.</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Case study for a simple nested event on the multilevel event extraction corpus. CNN: convolutional neural network.</p>
            </caption>
            <graphic xlink:href="medinform_v10i6e37804_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Case 2</title>
          <p>Case 2 is a general nested event whose sentence component is complete, and the role types of event arguments are <italic>theme</italic> and <italic>cause</italic>. As shown in <xref rid="figure5" ref-type="fig">Figure 5</xref>, the CNN model detects all correct event triggers but cannot detect the correct event arguments. The CNN model is a pipeline approach that considers trigger recognition and argument detection tasks in a cascade rather than a parallel relationship. In general, they first input the text into the CNN model to identify the triggers in the sentence. Then, they construct &#60;trigger, entity&#62; or &#60;trigger, trigger&#62; candidate pairs and input them into the CNN model again to detect the arguments. Finally, rule-based or machine learning-based methods are used to postprocess triggers and arguments to construct complete biomedical events. If there is an error in some of these steps, it will directly affect the performance of event extraction. However, our joint method regards trigger recognition and argument detection as parallel tasks that can provide valid information. Thus, we trained both tasks jointly with one model, and errors could only be generated during the model training.</p>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>Case study for a common nested event on multilevel event extraction corpus. CNN: convolutional neural network.</p>
            </caption>
            <graphic xlink:href="medinform_v10i6e37804_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Case 3</title>
          <p>Case 3 is a cross-sentence nested event, as shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>. From this example, we can determine what needs to be improved. As multiple events are nested in each other, and some of these events are not in the same sentence, this prevents the model from extracting all events efficiently and accurately. Compared with the CNN model, although our model can identify the <italic>positive regulation</italic> event triggered by <italic>resulting</italic>, it is not in the same clause as the <italic>development</italic> event triggered by <italic>create</italic>, which causes the <italic>positive regulation</italic> event to lack an event argument.</p>
          <fig id="figure6" position="float">
            <label>Figure 6</label>
            <caption>
              <p>Case study for an across-sentence nested event on multilevel event extraction corpus. CNN: convolutional neural network.</p>
            </caption>
            <graphic xlink:href="medinform_v10i6e37804_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, a CPJE framework based on a multi-head attention graph CNN is proposed to achieve biomedical event extraction tasks. The cascading errors between the 2 subtasks were reduced because of the use of the joint extraction framework. With the help of the attention-based gate GCN, syntactic dependency information and the interrelations between triggers and related entities were effectively learned; thus, the extraction performance of nested biomedical events improved. The Dice loss replaced the cross-entropy loss, which weakened the negative impact of the imbalanced data set. Overall, the model obtained the best F<sub>1</sub> score in the MLEE biomedical event extraction corpus and achieved favorable performance on the BioNLP-ST 2011 GE corpus. In the future, we will consider integrating external resource knowledge to allow the model to learn richer information and improve the performance of cross-sentence nested events.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>Bidirectional Encoder Representation From Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BiLSTM</term>
          <def>
            <p>bidirectional long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BioBERT</term>
          <def>
            <p>Biomedical Bidirectional Encoder Representation From Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BioNLP</term>
          <def>
            <p>biomedical natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">BioNLP-ST</term>
          <def>
            <p>biomedical natural language processing shared task</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">CPJE</term>
          <def>
            <p>conditional probability joint extraction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">GCN</term>
          <def>
            <p>graph convolutional network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">GE</term>
          <def>
            <p>Genia event</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">GEANet-SciBERT</term>
          <def>
            <p>Graph Edge-conditioned Attention Networks with Science BERT</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">GNN</term>
          <def>
            <p>graph neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">HANN</term>
          <def>
            <p>hierarchical artificial neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">KB</term>
          <def>
            <p>knowledge base</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">mdBLSTM</term>
          <def>
            <p>bidirectional long short-term memory with a multilevel attention mechanism and dependency-based word embeddings</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">MLEE</term>
          <def>
            <p>multilevel event extraction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">POS</term>
          <def>
            <p>parts of speech</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">RL</term>
          <def>
            <p>reinforcement learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">SGD</term>
          <def>
            <p>stochastic gradient descent</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">TEES</term>
          <def>
            <p>Turku Event Extraction System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was funded by grants from the National Natural Science Foundation of China (number 62072070).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>YW proposed the study of biomedical event extraction, implemented and verified the effectiveness of the joint extraction framework, and wrote the first draft. JW put forward constructive suggestions for revising this draft. H Lu read the final manuscript and provided some useful suggestions. H Lin read and approved the final manuscript. BX read and approved the final manuscript. YZ helped to review and revise the draft. SKB helped revise the draft.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Pereira</surname>
              <given-names>FC</given-names>
            </name>
            <name name-style="western">
              <surname>Kulick</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Winters</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>PS</given-names>
            </name>
          </person-group>
          <article-title>Simple algorithms for complex relation extraction with applications to biomedical IE</article-title>
          <source>Proceedings of the 43rd Annual Meeting on Association for Computational Linguistics</source>
          <year>2005</year>
          <conf-name>ACL '05</conf-name>
          <conf-date>June 25-30, 2005</conf-date>
          <conf-loc>Ann Arbor, MI, USA</conf-loc>
          <fpage>491</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.3115/1219840.1219901</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kilicoglu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bergler</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Effective bio-event extraction using trigger words and syntactic dependencies</article-title>
          <source>Comput Intell</source>
          <year>2011</year>
          <month>11</month>
          <day>27</day>
          <volume>27</volume>
          <issue>4</issue>
          <fpage>583</fpage>
          <lpage>609</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1467-8640.2011.00401.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>A multiple distributed representation method based on neural network for biomedical event extraction</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2017</year>
          <month>12</month>
          <day>20</day>
          <volume>17</volume>
          <issue>Suppl 3</issue>
          <fpage>171</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-017-0563-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-017-0563-9</pub-id>
          <pub-id pub-id-type="medline">29297321</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-017-0563-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC5751641</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Björne</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Salakoski</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Biomedical event extraction using convolutional neural networks and dependency parsing</article-title>
          <source>Proceedings of the BioNLP 2018 workshop</source>
          <year>2018</year>
          <conf-name>BioNLP '18</conf-name>
          <conf-date>July 19, 2018</conf-date>
          <conf-loc>Melbourne, Australia</conf-loc>
          <fpage>98</fpage>
          <lpage>108</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/w18-2311</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Multi-level attention based BLSTM neural network for biomedical event extraction</article-title>
          <source>IEICE Trans Inf Syst</source>
          <year>2019</year>
          <volume>E102.D</volume>
          <issue>9</issue>
          <fpage>1842</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1587/transinf.2018edp7268</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pyysalo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ohta</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Miwa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tsujii</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ananiadou</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Event extraction across multiple levels of biological organization</article-title>
          <source>Bioinformatics</source>
          <year>2012</year>
          <month>09</month>
          <day>15</day>
          <volume>28</volume>
          <issue>18</issue>
          <fpage>i575</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22962484"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/bts407</pub-id>
          <pub-id pub-id-type="medline">22962484</pub-id>
          <pub-id pub-id-type="pii">bts407</pub-id>
          <pub-id pub-id-type="pmcid">PMC3436834</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Takagi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yonezawa</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Overview of Genia event task in BioNLP shared task 2011</article-title>
          <source>Proceedings of BioNLP Shared Task 2011 Workshop</source>
          <year>2011</year>
          <conf-name>BioNLP '11</conf-name>
          <conf-date>June 24, 2011</conf-date>
          <conf-loc>Portland, OR, USA</conf-loc>
          <fpage>7</fpage>
          <lpage>15</lpage>
          <pub-id pub-id-type="doi">10.1186/1471-2105-13-s11-s1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Joint entity and relation extraction based on a hybrid neural network</article-title>
          <source>Neurocomputing</source>
          <year>2017</year>
          <month>09</month>
          <day>27</day>
          <volume>257</volume>
          <fpage>59</fpage>
          <lpage>66</lpage>
          <pub-id pub-id-type="doi">10.1016/j.neucom.2016.12.075</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>ZX</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>ZH</given-names>
            </name>
          </person-group>
          <article-title>Distant supervision relation extraction with intra-bag and inter-bag attentions</article-title>
          <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2019</year>
          <conf-name>NAACL '19</conf-name>
          <conf-date>June 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN, USA</conf-loc>
          <fpage>2810</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.48550/arXiv.1904.00143</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Reinforcement learning for relation classification from noisy data</article-title>
          <source>Proceedings of the 32nd AAAI Conference on Artificial Intelligence</source>
          <year>2018</year>
          <month>2</month>
          <conf-name>AAAI '18</conf-name>
          <conf-date>Feb 2-7, 2018</conf-date>
          <conf-loc>New Orleans, LA, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>PH</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>WY</given-names>
            </name>
          </person-group>
          <article-title>Graphrel: Modeling text as relational graphs for joint entity and relation extraction</article-title>
          <source>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2019</year>
          <conf-name>ACL '19</conf-name>
          <conf-date>July 28-August 2, 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <fpage>1409</fpage>
          <lpage>18</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/p19-1136</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Attention guided graph convolutional networks for relation extraction</article-title>
          <source>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2019</year>
          <conf-name>ACL '19</conf-name>
          <conf-date>July 28-August 2, 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <fpage>241</fpage>
          <lpage>51</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/p19-1024</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Joint event extraction via structured prediction with global features</article-title>
          <source>Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics</source>
          <year>2013</year>
          <month>8</month>
          <conf-name>ACL '13</conf-name>
          <conf-date>August 4-9, 2013</conf-date>
          <conf-loc>Sofia, Bulgaria</conf-loc>
          <fpage>73</fpage>
          <lpage>82</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Keith</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Handler</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pinkham</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Magliozzi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McDuffie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Identifying civilians killed by police with distantly supervised entity-event extraction</article-title>
          <source>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2017</year>
          <month>9</month>
          <conf-name>EMNLP '17</conf-name>
          <conf-date>September 7-8, 2017</conf-date>
          <conf-loc>Copenhagen, Denmark</conf-loc>
          <fpage>1547</fpage>
          <lpage>57</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/d17-1163</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reichart</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Barzilay</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Multi-event extraction guided by global constraints</article-title>
          <source>Proceedings of the 2012 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2012</year>
          <month>6</month>
          <conf-name>NAACL '12</conf-name>
          <conf-date>June 3-8, 2012</conf-date>
          <conf-loc>Montreal, Canada</conf-loc>
          <fpage>70</fpage>
          <lpage>9</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Automatic event extraction with structured preference modeling</article-title>
          <source>Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2012</year>
          <month>7</month>
          <conf-name>ACL '12</conf-name>
          <conf-date>July 8-14, 2012</conf-date>
          <conf-loc>Jeju Island, Korea</conf-loc>
          <fpage>835</fpage>
          <lpage>44</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sha</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sui</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Jointly extracting event triggers and arguments by dependency-bridge RNN and tensor-based argument interaction</article-title>
          <source>Proceedings of the 32nd AAAI Conference on Artificial Intelligence</source>
          <year>2018</year>
          <conf-name>AAAI '18</conf-name>
          <conf-date>February 2-7, 2018</conf-date>
          <conf-loc>New Orleans, LA, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Grishman</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Modeling skip-grams for event detection with convolutional neural networks</article-title>
          <source>Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2016</year>
          <conf-name>EMNLP '16</conf-name>
          <conf-date>November 1-5, 2016</conf-date>
          <conf-loc>Austin, TX, USA</conf-loc>
          <fpage>886</fpage>
          <lpage>91</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/d16-1085</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Jointly multiple events extraction via attention-based graph information aggregation</article-title>
          <source>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2018</year>
          <conf-name>EMNLP '18</conf-name>
          <conf-date>October 31-November 4, 2018</conf-date>
          <conf-loc>Brussels, Belgium</conf-loc>
          <fpage>1247</fpage>
          <lpage>56</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/d18-1156</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Ohta</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Pyysalo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kano</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tsujii</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Overview of BioNLP'09 shared task on event extraction</article-title>
          <source>Proceedings of the Workshop on Current Trends in Biomedical Natural Language Processing: Shared Task</source>
          <year>2009</year>
          <conf-name>BioNLP '09</conf-name>
          <conf-date>June 05, 2009</conf-date>
          <conf-loc>Boulder, CO, USA</conf-loc>
          <fpage>1</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.3115/1572340.1572342</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bossy</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Golik</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ratkovic</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Bessières</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Nédellec</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Bionlp shared task 2013 - an overview of the bacteria biotope task</article-title>
          <source>Proceedings of the BioNLP Shared Task 2013 Workshop</source>
          <year>2013</year>
          <conf-name>BioNLP '13</conf-name>
          <conf-date>August 09, 2013</conf-date>
          <conf-loc>Sofia, Bulgaria</conf-loc>
          <fpage>161</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/W16-3002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miwa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Saetre</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Tsujii</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Event extraction with complex event classification using rich features</article-title>
          <source>J Bioinform Comput Biol</source>
          <year>2010</year>
          <month>02</month>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>131</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1142/s0219720010004586</pub-id>
          <pub-id pub-id-type="medline">20183879</pub-id>
          <pub-id pub-id-type="pii">S0219720010004586</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miwa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ananiadou</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Boosting automatic event extraction from the literature using domain adaptation and coreference resolution</article-title>
          <source>Bioinformatics</source>
          <year>2012</year>
          <month>07</month>
          <day>01</day>
          <volume>28</volume>
          <issue>13</issue>
          <fpage>1759</fpage>
          <lpage>65</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22539668"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/bts237</pub-id>
          <pub-id pub-id-type="medline">22539668</pub-id>
          <pub-id pub-id-type="pii">bts237</pub-id>
          <pub-id pub-id-type="pmcid">PMC3381963</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Björne</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Salakoski</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>TEES 2.1: automated annotation scheme learning in the BioNLP 2013 shared task</article-title>
          <source>Proceedings of the BioNLP Shared Task 2013 Workshop</source>
          <year>2013</year>
          <conf-name>BioNLP '13</conf-name>
          <conf-date>August 9, 2013</conf-date>
          <conf-loc>Sofia, Bulgaria</conf-loc>
          <fpage>16</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/w16-3009</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Majumder</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ekbal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Naskar</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>Biomolecular event extraction using a stacked generalization-based classifier</article-title>
          <source>Proceedings of the 13th International Conference on Natural Language Processing</source>
          <year>2016</year>
          <conf-name>ICNLP '16</conf-name>
          <conf-date>December 17-20, 2016</conf-date>
          <conf-loc>Varanasi, India</conf-loc>
          <fpage>55</fpage>
          <lpage>64</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Riedel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>McCallum</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Robust biomedical event extraction with dual decomposition and minimal domain adaptation</article-title>
          <source>Proceedings of BioNLP Shared Task 2011 Workshop</source>
          <year>2011</year>
          <conf-name>BioNLP '11</conf-name>
          <conf-date>June 24, 2011</conf-date>
          <conf-loc>Portland, OR, USA</conf-loc>
          <fpage>46</fpage>
          <lpage>50</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Venugopal</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gogate</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Relieving the Computational Bottleneck: joint inference for event extraction with high-dimensional features</article-title>
          <source>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2014</year>
          <conf-name>EMNLP '14</conf-name>
          <conf-date>October 25-29, 2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <fpage>831</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1090</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>DQ</given-names>
            </name>
            <name name-style="western">
              <surname>Verspoor</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>From POS tagging to dependency parsing for biomedical event extraction</article-title>
          <source>BMC Bioinformatics</source>
          <year>2019</year>
          <month>02</month>
          <day>12</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>72</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-2604-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12859-019-2604-0</pub-id>
          <pub-id pub-id-type="medline">30755172</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12859-019-2604-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6373122</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A semi-supervised learning framework for biomedical event extraction based on hidden topics</article-title>
          <source>Artif Intell Med</source>
          <year>2015</year>
          <month>05</month>
          <volume>64</volume>
          <issue>1</issue>
          <fpage>51</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2015.03.004</pub-id>
          <pub-id pub-id-type="medline">25863986</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(15)00027-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Marcu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Knight</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Daumé III</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Biomedical event extraction using abstract meaning representation</article-title>
          <source>Proceedings of the BioNLP 2017 workshop</source>
          <year>2017</year>
          <conf-name>BioNLP '17</conf-name>
          <conf-date>August 04, 2017</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <fpage>126</fpage>
          <lpage>35</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/w17-2315</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>KC</given-names>
            </name>
          </person-group>
          <article-title>Context awareness and embedding for biomedical event extraction</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>01</month>
          <day>15</day>
          <volume>36</volume>
          <issue>2</issue>
          <fpage>637</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz607</pub-id>
          <pub-id pub-id-type="medline">31392318</pub-id>
          <pub-id pub-id-type="pii">5544930</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>A novel method for multiple biomedical events extraction with reinforcement learning and knowledge bases</article-title>
          <source>Proceedings of the 2020 IEEE International Conference on Bioinformatics and Biomedicine</source>
          <year>2020</year>
          <conf-name>BIBM '20</conf-name>
          <conf-date>December 16-19, 2020</conf-date>
          <conf-loc>Seoul, South Korea</conf-loc>
          <fpage>402</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1109/bibm49941.2020.9313214</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Biomedical event extraction based on knowledge-driven tree-LSTM</article-title>
          <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2019</year>
          <conf-name>NAACL '19</conf-name>
          <conf-date>June 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN, USA</conf-loc>
          <fpage>1421</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/N19-1145</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Biomedical event extraction with hierarchical knowledge graphs</article-title>
          <source>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2020</year>
          <conf-name>EMNLP '20</conf-name>
          <conf-date>November 16-20, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <fpage>1277</fpage>
          <lpage>85</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.findings-emnlp.114</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Trieu</surname>
              <given-names>HL</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>TT</given-names>
            </name>
            <name name-style="western">
              <surname>Duong</surname>
              <given-names>KN</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Miwa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ananiadou</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>DeepEventMine: end-to-end neural nested event extraction from biomedical texts</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>12</month>
          <day>08</day>
          <volume>36</volume>
          <issue>19</issue>
          <fpage>4910</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33141147"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa540</pub-id>
          <pub-id pub-id-type="medline">33141147</pub-id>
          <pub-id pub-id-type="pii">5858975</pub-id>
          <pub-id pub-id-type="pmcid">PMC7750964</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>A novel joint biomedical event extraction framework via two-level modeling of documents</article-title>
          <source>Inf Sci</source>
          <year>2021</year>
          <month>03</month>
          <volume>550</volume>
          <fpage>27</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ins.2020.10.047</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ramponi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>van der Goot</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lombardo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Plank</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Biomedical event extraction as sequence labeling</article-title>
          <source>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2020</year>
          <conf-name>EMNLP '20</conf-name>
          <conf-date>November 16-20, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <fpage>5357</fpage>
          <lpage>67</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.emnlp-main.431</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>02</month>
          <day>15</day>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1234</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31501885"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
          <pub-id pub-id-type="medline">31501885</pub-id>
          <pub-id pub-id-type="pii">5566506</pub-id>
          <pub-id pub-id-type="pmcid">PMC7703786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Accurate unlexicalized parsing</article-title>
          <source>Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics</source>
          <year>2003</year>
          <conf-name>ACL '03</conf-name>
          <conf-date>July 7-12, 2003</conf-date>
          <conf-loc>Sapporo, Japan</conf-loc>
          <fpage>423</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.3115/1075096.1075150</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>Ł</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>Proceedings of Annual Conference on Advances in Neural Information Processing Systems</source>
          <year>2017</year>
          <conf-name>NIPS '17</conf-name>
          <conf-date>December 4-9, 2017</conf-date>
          <conf-loc>Long Beach, CA, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Dice loss for data-imbalanced NLP tasks</article-title>
          <source>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2020</year>
          <conf-name>ACL '20</conf-name>
          <conf-date>July 5-10, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <fpage>465</fpage>
          <lpage>76</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.45</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Björne</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Salakoski</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Generalizing biomedical event extraction</article-title>
          <source>Proceedings of BioNLP Shared Task 2011 Workshop</source>
          <year>2011</year>
          <conf-name>BioNLP '11</conf-name>
          <conf-date>June 24, 2011</conf-date>
          <conf-loc>Portland, OR, USA</conf-loc>
          <fpage>183</fpage>
          <lpage>91</lpage>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
