<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i8e38052</article-id>
      <article-id pub-id-type="pmid">35969463</article-id>
      <article-id pub-id-type="doi">10.2196/38052</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Exploiting Intersentence Information for Better Question-Driven Abstractive Summarization: Algorithm Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Hao</surname>
            <given-names>Tianyong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhao</surname>
            <given-names>Di</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sun</surname>
            <given-names>Cong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Xin</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8800-912X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Jian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>School of Computer Science and Technology</institution>
            <institution>Dalian University of Technology</institution>
            <addr-line>No 2 Linggong Road</addr-line>
            <addr-line>Dalian, 116023</addr-line>
            <country>China</country>
            <phone>86 13604119266</phone>
            <email>wangjian@dlut.edu.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4656-7446</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>Bo</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5453-978X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>Hongfei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0872-7688</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Bo</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6933-922X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>Zhihao</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6186-2024</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>School of Computer Science and Technology</institution>
        <institution>Dalian University of Technology</institution>
        <addr-line>Dalian</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jian Wang <email>wangjian@dlut.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>8</issue>
      <elocation-id>e38052</elocation-id>
      <history>
        <date date-type="received">
          <day>17</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>15</day>
          <month>5</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>26</day>
          <month>5</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>10</day>
          <month>6</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Xin Wang, Jian Wang, Bo Xu, Hongfei Lin, Bo Zhang, Zhihao Yang. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 15.08.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/8/e38052" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Question-driven summarization has become a practical and accurate approach to summarizing the source document. The generated summary should be concise and consistent with the concerned question, and thus, it could be regarded as the answer to the nonfactoid question. Existing methods do not fully exploit question information over documents and dependencies across sentences. Besides, most existing summarization evaluation tools like recall-oriented understudy for gisting evaluation (ROUGE) calculate N-gram overlaps between the generated summary and the reference summary while neglecting the factual consistency problem.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This paper proposes a novel question-driven abstractive summarization model based on transformer, including a two-step attention mechanism and an overall integration mechanism, which can generate concise and consistent summaries for nonfactoid question answering.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Specifically, the two-step attention mechanism is proposed to exploit the mutual information both of question to context and sentence over other sentences. We further introduced an overall integration mechanism and a novel pointer network for information integration. We conducted a question-answering task to evaluate the factual consistency between the generated summary and the reference summary.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The experimental results of question-driven summarization on the PubMedQA data set showed that our model achieved ROUGE-1, ROUGE-2, and ROUGE-L measures of 36.01, 15.59, and 30.22, respectively, which is superior to the state-of-the-art methods with a gain of 0.79 (absolute) in the ROUGE-2 score. The question-answering task demonstrates that the generated summaries of our model have better factual constancy. Our method achieved 94.2% accuracy and a 77.57% F1 score.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our proposed question-driven summarization model effectively exploits the mutual information among the question, document, and summary to generate concise and consistent summaries.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>question-driven abstractive summarization</kwd>
        <kwd>transformer</kwd>
        <kwd>multi-head attention</kwd>
        <kwd>pointer network</kwd>
        <kwd>question answering</kwd>
        <kwd>factual consistency</kwd>
        <kwd>algorithm</kwd>
        <kwd>validation</kwd>
        <kwd>natural language processing</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Automatic text summarization of natural language aims to summarize the source document to generate a concise and informative description for helping people efficiently and quickly capture the main idea [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. In the biomedical domain, question-driven answer summarization can be particularly useful for people whether they have a biomedical background or not because the generated summary only covers the key information with respect to a specific question and filters out the explanation part [<xref ref-type="bibr" rid="ref3">3</xref>]. It is different from a factoid question-answering (QA) [<xref ref-type="bibr" rid="ref4">4</xref>] system. The answer of factoid QA is a phrase or a sentence according to the question, but users prefer the detailed answer including more information to the accurate answer. Summaries for nonfactoid questions [<xref ref-type="bibr" rid="ref5">5</xref>] should be semantically consistent and identical with the context. PubMedQA [<xref ref-type="bibr" rid="ref6">6</xref>] is a novel biomedical nonfactoid QA data set collected from PubMed articles in which the title is a question and can be answered by yes or no. Some related studies [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>] treat this QA data set as a summarization task and take the conclusion part of the abstract as the answer summary.</p>
      <p>Early works put emphasis on query-based summarization approaches [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>] in which the aim is to extract the sentences relevant to the given query. However, these methods are typically based on semantic relevance from query to context and neglect mutual information at the sentence level, which is helpful for the reasoning or inference process in question-driven summarization. These traditional extractive summarization methods are mainly based on information retrieval methods to select sentences that heavily rely on feature engineering, and the results performance is restricted by pipelines [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Though extractive summarization is more grammatical and coherent, the extractive sentences fail to have a logical connection. In contrast to extractive methods, abstractive methods produce summaries at the word level based on semantic comprehension [<xref ref-type="bibr" rid="ref8">8</xref>]. Consequently, question-driven abstractive answer summarization is studied to generate the concise and salient short answer, which is also informative for answering the question.</p>
      <p>To tackle question-driven abstractive summarization, the answer summary should be highly related to the concerned question. Existing studies [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref14">14</xref>] often concentrate on processing the mutual information between the question and document. However, though some sentences are not strongly related to the question, they further explain the central entity in question and affect the expression of the context. Mutual information among answer sentences is underused. Furthermore, it is hard for the recurrent neural network (RNN)–based model to capture the information of long sentences. Existing studies model the sentences separately, which hinders the interaction among sentences. To this end, we propose a novel transformer-based model [<xref ref-type="bibr" rid="ref15">15</xref>] named Trans-Att that incorporates a two-step attention mechanism to enhance the mutual information both of question to context and sentence over other sentences. A novel multi-view pointer-generator network is proposed to create a condensed and concise summary to better use the question and context information.</p>
      <p>Furthermore, a common problem in the practical application of abstractive summarization models is the factual inconsistency [<xref ref-type="bibr" rid="ref16">16</xref>]. This refers to the phenomenon that the model produces a summary that sometimes distorts and fabricates the facts. Recent studies point out that up to 30% of the generated summaries contain such factual inconsistencies [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. One main reason is that most existing summarization evaluation tools calculate N-gram overlaps between the generated summary and the reference [<xref ref-type="bibr" rid="ref16">16</xref>]. Though some models make higher scores in token-level metrics like recall-oriented understudy for gisting evaluation (ROUGE) [<xref ref-type="bibr" rid="ref18">18</xref>], the generated summaries still lack factual correctness. Thus, human evaluation is still the primary method for evaluating the factual consistency. In question-driven answer summarization, generated summaries should be consistent with the context semantically. Wang et al [<xref ref-type="bibr" rid="ref19">19</xref>] and Durmus et al [<xref ref-type="bibr" rid="ref20">20</xref>] propose the QA-based factual consistency evaluation metrics QAGS and FEQA separately. They first generate a set of questions about the summary and then use a QA model to answer these questions for evaluation. Because of the characteristics of the PubMedQA data set, the questions are general questions, and they can be answered by yes or no. We use the summaries as the context for the QA task to evaluate the factual consistency.</p>
      <p>In this paper, a novel question-driven abstractive summarization based on transformer is proposed, namely Trans-Att, that incorporates a two-step attention mechanism and an overall integration mechanism to summarize the document with respect to the nonfactoid questions. Concretely, the two-step attention mechanism can learn richer structural dependencies among sentences and the relevance of the question and the document. The overall integration mechanism integrates the question, the document, and the correlative summary to generate a summary representation, which allows the model to use the comprehensive information. A novel multi-view pointer network is then proposed by integrating transformer and pointer-generator networks [<xref ref-type="bibr" rid="ref21">21</xref>] to facilitate copy words from the question or the document to better use the question and context information. Finally, besides question-driven abstractive summarization evaluated by ROUGE, we also assess the model performance by QA task to evaluate the generated summary and whether they are factually consistent with the source document with regard to the question. The effectiveness of this model is empirically validated on the text summarization task and QA task, and achieves state-of-the-art performance on the PubMedQA data set.</p>
      <p>The following are our main contributions. First, the novel architecture Trans-Att uses a two-step attention mechanism for better integrating the information in both question to context and sentence over other sentences.</p>
      <p>Second, we propose a novel multi-view pointer network to generate tokens through overall integration, which integrates the attentive question, the attentive document, and the correlative summary to generate a summary representation.</p>
      <p>Finally, besides ROUGE for automatically evaluating the summarized answers, we conduct a QA task to evaluate the factual consistency.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Question-Driven Abstractive Summarization</title>
        <p>Automatic text summarization is a challenging task in the natural language processing field. It aims to generate simple and coherent essays that comprehensively and accurately reflect the central content of an original document. It can be categorized into two approaches: extractive and abstractive methods. The former method selects a few relevant sentences from the original text, while the latter needs to rephrase and generate a new sentence in which some words are not necessarily present in the original text. In this paper, we focus on abstractive summarization for its potential of summarizing the text more coherently and logically.</p>
        <p>Question-driven summarization is intended to summarize the original document in terms of a specific question, which is different from query-based summarization. In query-based summarization, the query is often a word or a phrase referring to a particular entity [<xref ref-type="bibr" rid="ref11">11</xref>]. Whereas a question may contain several entities and a specific semantic meaning, and this requires the model to have the reasoning or inference ability to identify the corresponding semantic contents in question-driven summarization [<xref ref-type="bibr" rid="ref8">8</xref>]. Early query-based summarization methods heavily rely on feature engineering including query-dependent features and query-independent features. The former includes named entity matching and semantic sentence matching, and the latter includes term frequency–inverse document frequency and stop word penalty [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Recently, some abstractive sequence-to-sequence neural networks have recently been proposed to generate summaries in regard to the given query [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Some recent works have developed a new method for question-driven summarization [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref14">14</xref>] in nonfactoid QA that requires much reasoning and an inference process. However, these methods only model the relation between the question and each sentence, and neglect the mutual information among sentences.</p>
      </sec>
      <sec>
        <title>Problem Formulation</title>
        <p>For the text summarization task, formally, assume that we have a question <italic>q</italic> = {<italic>q</italic><sub>1</sub>, ..., <italic>q<sub>m</sub></italic>} with m words and a source document <inline-graphic xlink:href="medinform_v10i8e38052_fig4.png" xlink:type="simple" mimetype="image"/> containing <italic>l<sup>s</sup></italic> sentences that have <italic>n<sup>s</sup></italic> words at most. The task is to generate an answer summary <italic>y</italic> = {<italic>y</italic><sub>1</sub>, ..., <italic>y<sub>n</sub></italic>} containing <italic>n</italic> words. The training goal is to maximize the probability <italic>p</italic>(<italic>y</italic>&#124;<italic>q</italic>, <italic>d</italic>). The overall architecture of our transformer-based question-driven abstractive answer summarization model is depicted in <xref rid="figure1" ref-type="fig">Figure 1</xref>, which consists of three main components: (1) two-step attention mechanism, (2) overall integration mechanism, and (3) multi-view pointer network for generation.</p>
        <p>For the QA task, given a question <italic>q</italic> and an answer summary <italic>y</italic>, the model should generate an answer <italic>a</italic> = {0,1} indicting yes or no to this question conditioned on the document. We adopted BioBERT [<xref ref-type="bibr" rid="ref23">23</xref>] as our model to evaluate the factual consistency, which is initialized with bidirectional encoder representation from transformers (BERT) [<xref ref-type="bibr" rid="ref24">24</xref>] and further pretrained on large-scale biomedical corpora.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview of our model.</p>
          </caption>
          <graphic xlink:href="medinform_v10i8e38052_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Encoder</title>
        <sec>
          <title>Question Encoder</title>
          <p>Let <inline-graphic xlink:href="medinform_v10i8e38052_fig5.png" xlink:type="simple" mimetype="image"/> denote the token embedding indicating the meaning of each token <italic>q<sub>i</sub></italic>. A special positional encoding <italic>pe<sub>i</sub></italic> indicates the position of each token within the question sequence. The input of the question encoder <italic>I<sup>q</sup></italic> is a sequence of embeddings.</p>
          <p>A transformer layer is used to encode the question. It reads the question <italic>q</italic> = {<italic>q</italic><sub>1</sub>, ..., <italic>q<sub>m</sub></italic>} and computes a hidden representation <inline-graphic xlink:href="medinform_v10i8e38052_fig6.png" xlink:type="simple" mimetype="image"/>, where <italic>N<sub>m</sub></italic> denotes the length of the question and <italic>d</italic> is the dimension of the vector. To get a fixed length question representation, <italic>H<sup>q</sup></italic> is then converted to a vector <inline-graphic xlink:href="medinform_v10i8e38052_fig7.png" xlink:type="simple" mimetype="image"/> by adding all token representations and normalizing it by question length.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </sec>
        <sec>
          <title>Sentence Encoder</title>
          <p>Each document is composed of several sentences. Given a document context <inline-graphic xlink:href="medinform_v10i8e38052_fig4.png" xlink:type="simple" mimetype="image"/>, the input of the sentence encoder is the sentences fed one by one. We used sentence position embedding to indicate the order of sentences.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <inline-graphic xlink:href="medinform_v10i8e38052_fig10.png" xlink:type="simple" mimetype="image"/> is the word embedding of <italic>w<sub>i,j</sub></italic>, which is the same word embedding as <inline-graphic xlink:href="medinform_v10i8e38052_fig11.png" xlink:type="simple" mimetype="image"/>; the position embedding of the token is represented as <inline-graphic xlink:href="medinform_v10i8e38052_fig12.png" xlink:type="simple" mimetype="image"/>, and <inline-graphic xlink:href="medinform_v10i8e38052_fig13.png" xlink:type="simple" mimetype="image"/> denotes the sentence position embedding of <inline-graphic xlink:href="medinform_v10i8e38052_fig14.png" xlink:type="simple" mimetype="image"/>.</p>
          <p><italic>I<sup>s</sup></italic> then fed into a transformer encoder to represent the sentence as a sequence of hidden vectors by:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>The hidden representation of a document is represented as <inline-graphic xlink:href="medinform_v10i8e38052_fig16.png" xlink:type="simple" mimetype="image"/> and a sentence vector <inline-graphic xlink:href="medinform_v10i8e38052_fig17.png" xlink:type="simple" mimetype="image"/>, where <italic>N<sup>s</sup></italic> = <italic>l<sup>s</sup></italic> × <italic>n<sup>s</sup></italic>.</p>
        </sec>
      </sec>
      <sec>
        <title>Two-step Attention Mechanism</title>
        <sec>
          <title>Intersentence Attention</title>
          <p>Inspired by Liu and Lapata [<xref ref-type="bibr" rid="ref25">25</xref>], we used an intersentence attention mechanism to model the dependencies across multiple sentences, where each sentence can attend to other sentences. We used a weighted-pooling operation to obtain a fixed-length sentence representation so that the diversity of each sentence representation is increased. Through a <italic>multi-head pooling mechanism</italic> [<xref ref-type="bibr" rid="ref25">25</xref>], each token can attend to other tokens by calculating weight distributions. Sentences can be encoded flexibly in different subspaces.</p>
          <p>The output representation <inline-graphic xlink:href="medinform_v10i8e38052_fig18.png" xlink:type="simple" mimetype="image"/> of the last transformer encoder layer for token <italic>w<sub>i</sub></italic><sub>,</sub><italic><sub>j</sub></italic> is denoted as <italic>x<sub>i</sub></italic><sub>,</sub><italic><sub>j</sub></italic> as the input. For each sentence <inline-graphic xlink:href="medinform_v10i8e38052_fig18.png" xlink:type="simple" mimetype="image"/> and for head <italic>z</italic> ∈ {1, ..., <italic>n<sub>head</sub></italic>}, we first conducted a linear transformation to obtain the attention scores <inline-graphic xlink:href="medinform_v10i8e38052_fig19.png" xlink:type="simple" mimetype="image"/> and value vectors <inline-graphic xlink:href="medinform_v10i8e38052_fig20.png" xlink:type="simple" mimetype="image"/>. The probability distribution <inline-graphic xlink:href="medinform_v10i8e38052_fig21.png" xlink:type="simple" mimetype="image"/> was then calculated within the sentence.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig22.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <inline-graphic xlink:href="medinform_v10i8e38052_fig23.png" xlink:type="simple" mimetype="image"/> and <inline-graphic xlink:href="medinform_v10i8e38052_fig24.png" xlink:type="simple" mimetype="image"/> are weights. <italic>d<sub>head</sub></italic> = <italic>d</italic> / <italic>n<sub>head</sub></italic> is the dimension of each head.</p>
          <p>Based on the probability distributions and value vectors, we conducted a weighted summation followed by another linear formation and layer normalization. Different vector <inline-graphic xlink:href="medinform_v10i8e38052_fig25.png" xlink:type="simple" mimetype="image"/> encodes sentences in a different subspace.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig26.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <inline-graphic xlink:href="medinform_v10i8e38052_fig27.png" xlink:type="simple" mimetype="image"/> is the weight. Because of the flexibility of combining multiple heads, each sentence has multiple attention distribution and focuses on different views of input.</p>
          <p>Dependencies among multiple sentences can be modeled by the intersentence attention that is similar to self-attention. Intersentence attention computes the distribution of attention so that each sentence attends to other sentences.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig28.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <inline-graphic xlink:href="medinform_v10i8e38052_fig29.png" xlink:type="simple" mimetype="image"/> are query, key, and value vectors, respectively. Through a self-attention calculation, <inline-graphic xlink:href="medinform_v10i8e38052_fig30.png" xlink:type="simple" mimetype="image"/> is obtained to represent the sentence vector that gathers the information of other sentences. l<sup>s</sup> is the number of input sentences.</p>
          <p>We then concatenate all context vectors and pass through a linear layer with weight <inline-graphic xlink:href="medinform_v10i8e38052_fig31.png" xlink:type="simple" mimetype="image"/> to update token representations by adding <italic>c<sub>i</sub></italic> to each token vector <italic>x<sub>i</sub></italic><sub>,</sub><italic><sub>j</sub></italic>. We then pass it through a two-layer multilayer perception, taking <italic>gelu</italic> as the activation function [<xref ref-type="bibr" rid="ref26">26</xref>]. Next, we pass the summation of <italic>x<sub>i</sub></italic><sub>,</sub><italic><sub>j</sub></italic> and <italic>g<sub>i</sub></italic><sub>,</sub><italic><sub>j</sub></italic> to a layer normalization. In this way, each sentence collects information from other sentences represented as <inline-graphic xlink:href="medinform_v10i8e38052_fig32.png" xlink:type="simple" mimetype="image"/>.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig33.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </sec>
        <sec>
          <title>Coattention</title>
          <p>Coattention is the second attention mechanism aimed at exploiting the pairwise mutual information between the question and the context.</p>
          <p>We further used an additive attention [<xref ref-type="bibr" rid="ref27">27</xref>] to obtain the distribution of document sentences that highly coincides with the question and then combines the question and question-related sentences to get their comprehensive representation <inline-graphic xlink:href="medinform_v10i8e38052_fig34.png" xlink:type="simple" mimetype="image"/> by:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig35.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <italic>MLP</italic> is the same as mentioned before. <inline-graphic xlink:href="medinform_v10i8e38052_fig36.png" xlink:type="simple" mimetype="image"/> are trainable parameters.</p>
        </sec>
      </sec>
      <sec>
        <title>Integration Decoder</title>
        <p>When given the first <italic>t</italic> – 1 tokens in the summary <italic>y</italic><sub>1</sub>, ..., <italic>y<sub>n</sub></italic>, the integration decoder incorporates the question and the document into the summary through an overall integration mechanism. The purpose is to predict the representation of the <italic>t</italic> – <italic>th</italic> token and transmit it to the pointer network.</p>
        <sec>
          <title>Overall Integration</title>
          <p>Inspired by gated recurrent units [<xref ref-type="bibr" rid="ref28">28</xref>], we designed an <italic>integration</italic> gate (<italic>z</italic>) to integrate the question-document and summary, which enables summary tokens at different times to merge information in different levels. Multi-head attention is then used to capture the information in the fused representation, <inline-graphic xlink:href="medinform_v10i8e38052_fig37.png" xlink:type="simple" mimetype="image"/>, and obtain <italic>s<sup>y</sup></italic>, which is a correlative summary. <inline-graphic xlink:href="medinform_v10i8e38052_fig38.png" xlink:type="simple" mimetype="image"/> is the vector representation of the input summary.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig39.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>To reinforce the understanding of the question and document of the decoder, <italic>s<sup>y</sup></italic> is used to compute attention with the question and the document, and obtain representations <italic>s<sup>q</sup></italic> and <italic>s<sup>s</sup></italic>.</p>
          <disp-formula><italic>s<sup>q</sup></italic> = <italic>Multi</italic> – <italic>headAttention</italic> (<italic>s<sup>y</sup></italic>, <italic>H<sup>q</sup></italic>, <italic>H<sup>q</sup></italic>) <bold>(23)</bold>
          </disp-formula>
          <disp-formula><italic>s<sup>s</sup></italic> = <italic>Multi</italic> – <italic>headAttention</italic> (<italic>s<sup>y</sup></italic>, <italic>H<sup>s</sup>'</italic>, <italic>H<sup>s</sup>'</italic>) <bold>(24)</bold>
          </disp-formula>
          <p>Next, similar to equation 20, the predicted representation <italic>o<sup>y</sup></italic> is obtained to integrate the attentive question, the attention document, and the correlative summary by using the <italic>integration</italic> gate.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig40.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where * is denoted as <italic><sup>q</sup></italic> or <italic><sup>s</sup></italic>.</p>
        </sec>
      </sec>
      <sec>
        <title>Multi-View Pointer Network</title>
        <p>To improve the probability of generating corresponding tokens from the question and the document, a novel multi-view pointer network is proposed based on multi-head attention as shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Multi-view pointer network. H<sup>q</sup>: hidden representation of question; y: hidden representation of the input summary; H<sup>s</sup>: hidden representation of document.</p>
          </caption>
          <graphic xlink:href="medinform_v10i8e38052_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>Question Tokens</title>
          <p>We computed the attention weights <italic>β<sup>q</sup></italic> through multiple attention weights in the multi-head attention.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig41.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>Where <italic>f<sub>β</sub></italic> means a function of getting multiple attentions in the multi-head attention. <inline-graphic xlink:href="medinform_v10i8e38052_fig42.png" xlink:type="simple" mimetype="image"/> is the weight, where <italic>n<sup>head</sup></italic> is the number of heads. <italic>β<sup>q</sup></italic> can be treated as the probability distribution over the question words. It can be represented as <inline-graphic xlink:href="medinform_v10i8e38052_fig43.png" xlink:type="simple" mimetype="image"/>.</p>
        </sec>
        <sec>
          <title>Document Tokens</title>
          <p>The distribution of the document that is relevant to the question can be served as a global distribution over each decoding step. <italic>β<sup>s</sup></italic> can be calculated similar to equation 27, which can be considered a local distribution at each decoding step. Thus, the distribution over the document can be calculated by:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig44.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig45.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </sec>
        <sec>
          <title>Vocabulary Tokens</title>
          <p>The predicted representation from the overall integration decoder is used to calculate the probability distribution <italic>p<sup>v</sup></italic> over the fixed vocabulary through a <italic>softmax</italic> layer; <italic>W<sup>v</sup></italic> is the weight from the word embeddings.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig46.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>The final probability distribution <italic>y<sub>t</sub></italic> to predict can be formulated from three aspects of word distributions as:</p>
          <disp-formula><italic>P</italic>(<italic>y<sub>t</sub></italic> &#124;<italic>q</italic>, <italic>d</italic>, <italic>y</italic> &#60; <italic>t</italic>) = <italic>softmax</italic> (<italic>W<sub>γ</sub>o<sup>y</sup></italic> + <italic>b<sub>γ</sub></italic>) ⋅ [<italic>p<sup>v</sup></italic>, <italic>p<sup>q</sup></italic>, <italic>p<sup>s</sup></italic>] <bold>(31)</bold>
          </disp-formula>
        </sec>
        <sec>
          <title>Loss Function</title>
          <p>The main training objective is to minimize the negative log likelihood between the reference summary and the predicted summary. Thus, Trans-Att can be trained by minimizing the objective.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e38052_fig47.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </sec>
      </sec>
      <sec>
        <title>Question-Answering Model</title>
        <p>BERT [<xref ref-type="bibr" rid="ref24">24</xref>] has already been used in QA tasks. We fine-tuned BioBERT [<xref ref-type="bibr" rid="ref23">23</xref>] as a baseline. We fed PubMedQA questions and corresponding texts that could be contexts, reference long answers, contexts and long answers, or generated summaries for comparison, separated by special [SEP] token, to the model. We take the special embedding [cls] from the last layer and use a <italic>softmax</italic> function to predict the final label that could be yes or no. The general loss was trained by minimizing the cross-entropy between the predicted labels and the true label distribution.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Set</title>
        <p>We evaluated our model on the nonfactoid QA data set PubMedQA [<xref ref-type="bibr" rid="ref6">6</xref>]. PubMedQA is a novel biomedical data set aiming at answering academic questions and has substantial instances with some expert annotations. Each instance is composed of a question that is a general question, a context that is the structured abstract without its conclusion, a long answer that is the conclusion of the abstract in terms of the question, and a final answer yes/no for the general question that summarizes the conclusion and can be used for the QA task. The statistics of the PubMedQA data set are shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <p>We adopted ROUGE-1, ROUGE-2, and ROUGE-L to automatically evaluate the summarized answers in the question-driven abstractive summarization task. The main metrics of the QA task are accuracy and macro-F1 under a reasoning-free setting in which the generated summary is added in the input.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Statistics of the PubMedQA data set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Task data set</td>
                <td>Training, n</td>
                <td>Development, n</td>
                <td>Test, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>QA<sup>a</sup> pairs</td>
                <td>169,000</td>
                <td>21,000</td>
                <td>21,000</td>
              </tr>
              <tr valign="top">
                <td>Average question length (word count)</td>
                <td>16.3</td>
                <td>16.4</td>
                <td>16.3</td>
              </tr>
              <tr valign="top">
                <td>Average document length (word count)</td>
                <td>238</td>
                <td>238</td>
                <td>239</td>
              </tr>
              <tr valign="top">
                <td>Average summary length (word count)</td>
                <td>41.0</td>
                <td>41.0</td>
                <td>40.9</td>
              </tr>
              <tr valign="top">
                <td>Average number of sentences</td>
                <td>9.32</td>
                <td>9.31</td>
                <td>9.33</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>QA: question-answering.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Experimental Settings</title>
        <p>ParlAI [<xref ref-type="bibr" rid="ref29">29</xref>] was implemented in our model as the code framework. The dimensions of word embedding size and hidden size were both 256. The text was encoded by byte-pair encoding [<xref ref-type="bibr" rid="ref30">30</xref>], and the embedding matrix was initialized with fastText. Both encoder and decoder layers of transformer-based models were 5, with feed-forward hidden size 512 and attention head 4 for all layers. The optimizer was Adam [<xref ref-type="bibr" rid="ref31">31</xref>] with an initial learning rate of 0.0005. We also applied the inverse square root learning schedule over the 5k warm-up dates. The dropout rate was set to 0.2, and gradient clipping was used with a maximum gradient norm of 0.1. Label smoothing of the value 0.1 was used for summary generation. We used beam search in the generation process with beam size 2 and adopted 3-gram blocking.</p>
      </sec>
      <sec>
        <title>Comparative Methods</title>
        <p>We report the performance of our proposed model in comparison with several baselines and state-of-the-art methods based on different methodologies, including extractive summarization, abstractive summarization, query-based summarization, and question-driven abstractive summarization.</p>
        <p>Two unsupervised extractive methods were used. LEAD3 is a simple but effective extractive summarization baseline that concatenates the first two sentences and the last sentence without question information. Maximal marginal relevance is an information retrieval model used to calculate the similarity between the text and the researched document for extractive summarization.</p>
        <p>Three widely adopted abstractive methods were adopted for comparison. Sequence-to-sequence model with attention [<xref ref-type="bibr" rid="ref27">27</xref>] is a simple encoder-decode model with attention based on RNN without respect to the question. Pointer-generator network [<xref ref-type="bibr" rid="ref21">21</xref>] is a hybrid pointer-generator architecture with coverage based on a neural sequence-to-sequence model for abstractive text summarization. Transformer [<xref ref-type="bibr" rid="ref15">15</xref>] implements the state-of-the-art encoder-decoder framework based on multi-head attention without access to the question.</p>
        <p>There were two query-based abstractive summarization methods used for comparison. The soft long short-term memory–based diversity attention model (SD<sub>2</sub>) [<xref ref-type="bibr" rid="ref10">10</xref>] adds a query attention mechanism to a sequence-to-sequence model. It learns to pay attention to different parts of the query at different time steps. Query-based summarization using neural networks (QS) [<xref ref-type="bibr" rid="ref11">11</xref>] incorporates question information into the pointer-generator network with the use of the vanilla attention mechanism.</p>
        <p>Finally, we implemented two of the latest question-driven answer summarization models for comparison. Hierarchical and sequential context modeling [<xref ref-type="bibr" rid="ref7">7</xref>] is a hierarchical compare-aggregate method used to integrate the interaction between the question and the document into final document representation at both the word level and sentence level. Multi-hop selective generator (MSG) [<xref ref-type="bibr" rid="ref8">8</xref>] models the relevance between question and sentences by leveraging a humanlike multi-hop reasoning process for question-driven summarization, in which the most related sentences are given higher weights.</p>
      </sec>
      <sec>
        <title>Experimental Results</title>
        <p>The experimental results of question-driven summarization in terms of ROUGE scores and QA with respect to accuracy and macro-F1 scores are presented in <xref ref-type="table" rid="table2">Tables 2</xref> and <xref ref-type="table" rid="table3">3</xref>. Both ROUGE scores and metrics of QA show that our model achieved competitive performance in comparison with state-of-the-art question-driven summarization methods.</p>
        <p>Compared with traditional text summarization, there was limited improvement for query-based summarization methods (SD<sub>2</sub> and QS), indicating that the question information was not sufficiently used. There was a noticeable margin, about 0.79 for ROUGE-2, higher than the current state-of-the-art model (MSG). This indicates that the model benefits from the information provided by mutual information between question and document, and among sentences. We noticed that the ROUGE-1 score of our model was lower than MSG. One possible explanation is that the length of the generated summary of MSG was longer than that of our model. Considering the characteristic of ROUGE-1 that measures the word overlap between the reference summary and the predicted summary, the longer summary has more possibility of generating words that appeared before.</p>
        <p>As for the QA result, we observed that if using the original answer summary, BioBERT achieves good enough scores. If the input answer summary can correctly answer the question, it is consistent to the original semantics. Thus, evaluating the factual consistency by a QA task is feasible. Suppose that we feed the context without long answer information to the model, which is under the reasoning-required setting; the result is comparatively lower because the reasoning and inference process is crucial in answering the question if the answer is not directly available. We treated the long answer as the summary, and its quality influenced the factual consistency. It was observed that there is still a big gap between the generated summary and the reference summary, which leaves room for improvement.</p>
        <p>Overall, the difference upon accuracy measurement was not significant by a narrow margin because of the imbalanced distribution of labels (92.8% yes vs 7.2% no). The F1 score was significant and representative, and our model achieved the best <italic>F</italic> score of 77.57%. The results show that the extractive methods performed better than the abstractive methods. We speculate that extractive summarization approaches directly copy from the source context. However, it is worth noting that the extractive methods have an upper bound, and they barely exceed the performance when given the whole context. There is substantial potential for abstractive approaches. Future work should explore the reasoning ability of abstractive methods.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Comparison with related works of question-driven summarization task.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="190"/>
            <col width="160"/>
            <col width="170"/>
            <col width="160"/>
            <col width="160"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Methods</td>
                <td>Types</td>
                <td>With question</td>
                <td>ROUGE<sup>a</sup>-1 (%)</td>
                <td>ROUGE-2 (%)</td>
                <td>ROUGE-L (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>LEAD3</td>
                <td>Extractive</td>
                <td>No</td>
                <td>30.94</td>
                <td>9.79</td>
                <td>25.89</td>
              </tr>
              <tr valign="top">
                <td>MMR<sup>b</sup></td>
                <td>Extractive</td>
                <td>No</td>
                <td>29.69</td>
                <td>9.50</td>
                <td>24.10</td>
              </tr>
              <tr valign="top">
                <td>S2SA<sup>c</sup></td>
                <td>Abstractive</td>
                <td>No</td>
                <td>32.40</td>
                <td>11.00</td>
                <td>27.30</td>
              </tr>
              <tr valign="top">
                <td>PGN<sup>d</sup></td>
                <td>Abstractive</td>
                <td>No</td>
                <td>32.89</td>
                <td>11.51</td>
                <td>28.10</td>
              </tr>
              <tr valign="top">
                <td>Transformer</td>
                <td>Abstractive</td>
                <td>No</td>
                <td>32.38</td>
                <td>11.34</td>
                <td>26.32</td>
              </tr>
              <tr valign="top">
                <td>SD<sub>2</sub><sup>e</sup></td>
                <td>Abstractive</td>
                <td>Query based</td>
                <td>32.33</td>
                <td>10.52</td>
                <td>26.01</td>
              </tr>
              <tr valign="top">
                <td>QS<sup>f</sup></td>
                <td>Abstractive</td>
                <td>Query based</td>
                <td>32.60</td>
                <td>11.10</td>
                <td>26.70</td>
              </tr>
              <tr valign="top">
                <td>HSCM<sup>g</sup></td>
                <td>Extractive</td>
                <td>Question driven</td>
                <td>32.34</td>
                <td>10.07</td>
                <td>25.98</td>
              </tr>
              <tr valign="top">
                <td>MSG<sup>h</sup></td>
                <td>Abstractive</td>
                <td>Question driven</td>
                <td>
                  <italic>37.20</italic>
                  <sup>i</sup>
                </td>
                <td>14.80</td>
                <td>30.20</td>
              </tr>
              <tr valign="top">
                <td>Trans-Att (ours)</td>
                <td>Abstractive</td>
                <td>Question driven</td>
                <td>36.01</td>
                <td>
                  <italic>15.59</italic>
                </td>
                <td>
                  <italic>30.22</italic>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>ROUGE: recall-oriented understudy for gisting evaluation.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>MMR: maximal marginal relevance.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>S2SA: sequence-to-sequence model with attention.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>PGN: pointer-generator network.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>SD<sub>2</sub>: soft long short-term memory–based diversity attention model.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>QS: query-based summarization using neural networks.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>HSCM: hierarchical and sequential context modeling.</p>
            </fn>
            <fn id="table2fn8">
              <p><sup>h</sup>MSG: multi-hop selective generator.</p>
            </fn>
            <fn id="table2fn9">
              <p><sup>i</sup>Italics indicate the best result.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Comparison with related work for question-answering task.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="300"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td>Methods</td>
                <td>Accuracy (%)</td>
                <td>F1 (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>LEAD3</td>
                <td>93.80</td>
                <td>67.06</td>
              </tr>
              <tr valign="top">
                <td>MMR<sup>a</sup></td>
                <td>
                  <italic>94.85</italic>
                  <sup>b</sup>
                </td>
                <td>75.69</td>
              </tr>
              <tr valign="top">
                <td>S2SA<sup>c</sup></td>
                <td>91.89</td>
                <td>63.81</td>
              </tr>
              <tr valign="top">
                <td>PGN<sup>d</sup></td>
                <td>91.93</td>
                <td>64.42</td>
              </tr>
              <tr valign="top">
                <td>Transformer</td>
                <td>94.18</td>
                <td>69.59</td>
              </tr>
              <tr valign="top">
                <td>SD<sub>2</sub><sup>e</sup></td>
                <td>94.34</td>
                <td>69.30</td>
              </tr>
              <tr valign="top">
                <td>HSCM<sup>f</sup></td>
                <td>93.78</td>
                <td>76.48</td>
              </tr>
              <tr valign="top">
                <td>MSG<sup>g</sup></td>
                <td>93.68</td>
                <td>73.27</td>
              </tr>
              <tr valign="top">
                <td>Trans-Att (ours)</td>
                <td>94.20</td>
                <td>
                  <italic>77.57</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>Majority</td>
                <td>92.76</td>
                <td>48.12</td>
              </tr>
              <tr valign="top">
                <td>Context</td>
                <td>96.50</td>
                <td>84.65</td>
              </tr>
              <tr valign="top">
                <td>Long answer</td>
                <td>99.04</td>
                <td>96.18</td>
              </tr>
              <tr valign="top">
                <td>Context + long answer</td>
                <td>99.20</td>
                <td>96.86</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>MMR: maximal marginal relevance.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>Italics indicate the best result.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>S2SA: sequence-to-sequence model with attention.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>PGN: pointer-generator network.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>SD<sub>2</sub>: soft long short-term memory–based diversity attention model.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>HSCM: hierarchical and sequential context modeling.</p>
            </fn>
            <fn id="table3fn7">
              <p><sup>g</sup>MSG: multi-hop selective generator.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Ablation Study</title>
        <p>To examine the contributions of our proposed modules, namely, intersentence attention, coattention, overall integration, and multi-view pointer network, we ran an ablation study. The experimental results are shown in <xref ref-type="table" rid="table4">Table 4</xref>.</p>
        <p>Overall, all the modules contributed to the final performance to some extent. The accuracy score was not significant compared with the F1 score because of the imbalanced distribution of labels. When the coattention was discarded, the performance of the model dropped substantially, which indicates that it plays a more important role in exploiting the pairwise mutual information between the question and the document sentences. Besides, applying intersentence attention also improved the performance, which indicates that it is not enough to only consider the question-related information. Interrelation among sentences is also worth paying attention to. The decrease on F1 was most significant, which demonstrates the effects of the two-step attention mechanism. Overall integration reinforces the understanding of the model upon the question and the document indicated by a noticeable decrease in F1. Because of the biomedical characteristic of PubMedQA, the out-of-vocabulary problem is much more severe. The ablation study results validated the importance of the multi-view pointer network.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>An ablation study for our model.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="350"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td>Methods</td>
                <td>ROUGE<sup>a</sup>-1</td>
                <td>ROUGE-2</td>
                <td>ROUGE-L</td>
                <td>Accuracy (%)</td>
                <td>F1 (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Trans-Att</td>
                <td>36.01</td>
                <td>15.59</td>
                <td>30.22</td>
                <td>94.20</td>
                <td>77.57</td>
              </tr>
              <tr valign="top">
                <td>Intersentence attention</td>
                <td>34.65</td>
                <td>13.92</td>
                <td>28.07</td>
                <td>93.87</td>
                <td>73.13</td>
              </tr>
              <tr valign="top">
                <td>Coattention</td>
                <td>34.05</td>
                <td>13.61</td>
                <td>26.50</td>
                <td>93.40</td>
                <td>70.62</td>
              </tr>
              <tr valign="top">
                <td>Overall integration</td>
                <td>34.28</td>
                <td>14.26</td>
                <td>28.63</td>
                <td>94.53</td>
                <td>72.37</td>
              </tr>
              <tr valign="top">
                <td>Multi-view pointer network</td>
                <td>35.16</td>
                <td>13.98</td>
                <td>29.32</td>
                <td>94.39</td>
                <td>75.67</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>ROUGE: recall-oriented understudy for gisting evaluation.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Case Study</title>
        <p>In <xref rid="figure3" ref-type="fig">Figure 3</xref>, we show the summaries generated by the proposed method and some baseline methods for comparison, and visualize the sources of the summaries with colors. The context underlined and highlighted with green was used by Trans-Att to generate the summary, which contains more information than in the reference summary. By comparison, we observed that Trans-Att not only successfully exploits the intersentence information with useful information but also uses the question information in understanding semantic content; pointer-generator network generates an irrelevant summary, which proves the importance of the question information; SD<sub>2</sub> fails to capture the core argument, resulting in repeating the question and paying attention to wrong information; the final answer demonstrates the validity in evaluating factual consistency by QA task (although SD<sub>2</sub> gives the right final answer, there is still a semantic mismatch because the first sentence is essentially the same as the question); and the bottom example demonstrates that there are limitations to the yes/no questions, the answer of which depends partly on clues of negative pronouns. Future work will consider increasing the diversity of the QA task.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Case study from PubMedQA (the bottom example omits the context; final answer is in parentheses). MSG: multi-hop selective generator; PGN: pointer-generator network; QS: query-based summarization using neural networks; SD<sub>2</sub>: soft long short-term memory–based diversity attention model; HELLP: hemolysis, elevated liver enzymes, and low platelets counts syndrome.</p>
          </caption>
          <graphic xlink:href="medinform_v10i8e38052_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Novel N-Grams</title>
        <p>We also analyzed the output of abstractive models by calculating the proportion of novel n-grams that appear in the summaries but not in the source texts. <xref ref-type="table" rid="table5">Table 5</xref> shows that summaries of our model account for a lower rate of novel n-grams than the reference summaries, indicating the quality of abstraction. We observed that the traditional abstractive approach (pointer-generator network), copies more phrases, perhaps because it generates more words from the context without being question driven, which increases the probability of unrelated information being selected. Note that MSG produces novel n-grams more frequently. However, it may contain the factual inconsistency problem in generating new words.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Proportion of novel n-grams.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Methods</td>
                <td>1 grams (%)</td>
                <td>2 grams (%)</td>
                <td>3 grams (%)</td>
                <td>4 grams (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Trans-Att</td>
                <td>11.00</td>
                <td>47.82</td>
                <td>67.12</td>
                <td>79.38</td>
              </tr>
              <tr valign="top">
                <td>MSG<sup>a</sup></td>
                <td>13.43</td>
                <td>54.66</td>
                <td>74.13</td>
                <td>85.01</td>
              </tr>
              <tr valign="top">
                <td>PGN<sup>b</sup></td>
                <td>16.29</td>
                <td>43.73</td>
                <td>58.38</td>
                <td>69.14</td>
              </tr>
              <tr valign="top">
                <td>Refrence</td>
                <td>27.83</td>
                <td>72.11</td>
                <td>87.17</td>
                <td>93.55</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>MSG: multi-hop selective generator.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>PGN: pointer-generator network.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Conclusions</title>
        <p>In this paper, a novel transformer-based question-driven abstractive summarization model was proposed to generate concise and consistent summaries for nonfactoid QA. A two-step attention mechanism was proposed to exploit the mutual information both of the question to context and the sentence over other sentences. We used the overall integration mechanism and the novel pointer network to better integrate and use information of the question, document, and summary. We conducted a QA task to evaluate the factual consistency between the generated summary and the reference summary. Experimental results demonstrate that our proposed model achieves comparable performance to the state-of-the-art methods.</p>
      </sec>
      <sec>
        <title>Future Work</title>
        <p>Due to the insufficiency of the data set quantity, we were limited to conducting experiments on PubMedQA. We are looking forward to conducting more persuasive experiments when the insufficiency is lifted. As for the evaluation of the factual consistency, we can also incorporate human expertise to further enhance the credibility of the proposed QA metric. Hopefully, our method can provide some inspiration in the summarization task.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>bidirectional encoder representation from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">MSG</term>
          <def>
            <p>multi-hop selective generator</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">QA</term>
          <def>
            <p>question answering</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">QS</term>
          <def>
            <p>query-based summarization using neural networks</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">RNN</term>
          <def>
            <p>recurrent neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ROUGE</term>
          <def>
            <p>recall-oriented understudy for gisting evaluation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SD<sub>2</sub></term>
          <def>
            <p>soft long short-term memory–based diversity attention model</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The publication of this paper is funded by grants from the Natural Science Foundation of China (62006034) and Natural Science Foundation of Liaoning Province (2021-BS-067)</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>XW and BZ completed the experiments and wrote the paper. JW and BX provided theoretical guidance and revision of the paper. HL, ZY, and BX contributed to the algorithm design.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gambhir</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Recent automatic text summarization techniques: a survey</article-title>
          <source>Artif Intelligence Rev</source>
          <year>2016</year>
          <month>3</month>
          <day>29</day>
          <volume>47</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>66</lpage>
          <pub-id pub-id-type="doi">10.1007/s10462-016-9475-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bao</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>What have we achieved on text summarization?</article-title>
          <year>2020</year>
          <month>11</month>
          <conf-name>2020 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>November 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <fpage>446</fpage>
          <lpage>469</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.emnlp-main.33</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Savery</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Abacha</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Gayen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Question-driven summarization of answers to consumer health questions</article-title>
          <source>Sci Data</source>
          <year>2020</year>
          <month>10</month>
          <day>02</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>322</fpage>
          <pub-id pub-id-type="doi">10.1038/s41597-020-00667-z</pub-id>
          <pub-id pub-id-type="medline">33009402</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41597-020-00667-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC7532186</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rajpurkar</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lopyrev</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>SQuAD: 100,000+ questions for machine comprehension of text</article-title>
          <year>2016</year>
          <month>11</month>
          <conf-name>2016 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>November 2016</conf-date>
          <conf-loc>Austin, TX</conf-loc>
          <fpage>2383</fpage>
          <lpage>2392</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/d16-1264</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>de Rijke</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Summarizing answers in non-factoid community question-answering</article-title>
          <source>Proceedings of the Tenth ACM International Conference on Web Search and Data Mining</source>
          <year>2017</year>
          <month>2</month>
          <day>2</day>
          <conf-name>WSDM '17</conf-name>
          <conf-date>February 6-10, 2017</conf-date>
          <conf-loc>Cambridge, United Kingdom</conf-loc>
          <fpage>405</fpage>
          <lpage>414</lpage>
          <pub-id pub-id-type="doi">10.1145/3018661.3018704</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Dhingra</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>PubMedQA: a dataset for biomedical research question answering</article-title>
          <year>2019</year>
          <month>11</month>
          <conf-name>2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing</conf-name>
          <conf-date>November 2019</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <fpage>2567</fpage>
          <lpage>2577</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1259</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Bridging hierarchical and sequential context modeling for question-driven extractive answer summarization</article-title>
          <source>Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval</source>
          <year>2020</year>
          <month>7</month>
          <day>25</day>
          <conf-name>SIGIR '20</conf-name>
          <conf-date>July 25-30, 2020</conf-date>
          <conf-loc>Virtual event, China</conf-loc>
          <fpage>1693</fpage>
          <lpage>1696</lpage>
          <pub-id pub-id-type="doi">10.1145/3397271.3401208</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Multi-hop inference for question-driven summarization</article-title>
          <year>2020</year>
          <month>11</month>
          <conf-name>2020 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>November 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <fpage>6734</fpage>
          <lpage>6744</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.emnlp-main.547</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>AttSum: joint learning of focusing and summarization with neural attention</article-title>
          <source>Proceedings of COLING 2016</source>
          <year>2016</year>
          <month>12</month>
          <conf-name>26th International Conference on Computational Linguistics: Technical Papers</conf-name>
          <conf-date>December 2016</conf-date>
          <conf-loc>Osaka, Japan</conf-loc>
          <fpage>546</fpage>
          <lpage>556</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/C16-1053"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nema</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Khapra</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Laha</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ravindran</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Diversity driven attention model for query-based abstractive summarization</article-title>
          <year>2017</year>
          <month>7</month>
          <conf-name>55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</conf-name>
          <conf-date>July 2017</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <fpage>1063</fpage>
          <lpage>1072</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/p17-1098</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hasselqvist</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Helmertz</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kågebäck</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Query-based abstractive summarization using neural networks</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on December 17, 2017.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1712.06100"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Raghavan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cardie</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Castelli</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Query-focused opinion summarization for user-generated content</article-title>
          <source>Proceedings of COLING 2014</source>
          <year>2014</year>
          <month>8</month>
          <conf-name>25th International Conference on Computational Linguistics: Technical Papers</conf-name>
          <conf-date>August 2014</conf-date>
          <conf-loc>Dublin, Ireland</conf-loc>
          <fpage>1660</fpage>
          <lpage>1669</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/C14-1157"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yulianti</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Scholer</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Croft</surname>
              <given-names>WB</given-names>
            </name>
            <name name-style="western">
              <surname>Sanderson</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Document summarization for answering non-factoid queries</article-title>
          <source>IEEE Trans Knowledge Data Eng</source>
          <year>2018</year>
          <month>1</month>
          <day>1</day>
          <volume>30</volume>
          <issue>1</issue>
          <fpage>15</fpage>
          <lpage>28</lpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2017.2754373</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>M</given-names>
            </name>
            <collab>Shen</collab>
          </person-group>
          <article-title>Joint learning of answer selection and answer summary generation in community question answering</article-title>
          <year>2020</year>
          <month>04</month>
          <day>03</day>
          <conf-name>The Thirty-Fourth AAAI Conference on Artificial Intelligence</conf-name>
          <conf-date>February 7-12, 2020</conf-date>
          <conf-loc>New York, NY</conf-loc>
          <fpage>7651</fpage>
          <lpage>7658</lpage>
          <pub-id pub-id-type="doi">10.1609/aaai.v34i05.6266</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>Proceedings of the 31st International Conference on Neural Information Processing Systems</source>
          <year>2017</year>
          <month>12</month>
          <day>4</day>
          <conf-name>NIPS'17</conf-name>
          <conf-date>December 4-9, 2017</conf-date>
          <conf-loc>Long Beach, CA</conf-loc>
          <fpage>6000</fpage>
          <lpage>6010</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/3295222.3295349"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>The factual inconsistency problem in abstractive text summarization: a survey</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on May 10, 2021.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2104.14839"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2104.14839</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kryscinski</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>McCann</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Evaluating the factual consistency of abstractive text summarization</article-title>
          <year>2020</year>
          <month>11</month>
          <conf-name>2020 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>November 16-20, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <fpage>9332</fpage>
          <lpage>9346</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.emnlp-main.750</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>CY</given-names>
            </name>
          </person-group>
          <article-title>ROUGE: a package for automatic evaluation of summaries</article-title>
          <year>2004</year>
          <month>7</month>
          <conf-name>Text Summarization Branches Out</conf-name>
          <conf-date>July 25-26, 2004</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <fpage>74</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/W04-1013/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Asking and answering questions to evaluate the factual consistency of summaries</article-title>
          <year>2020</year>
          <month>7</month>
          <conf-name>58th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>July 5-10, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <fpage>5008</fpage>
          <lpage>5020</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.450</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Durmus</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Diab</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>FEQA: a question answering evaluation framework for faithfulness assessment in abstractive summarization</article-title>
          <year>2020</year>
          <month>7</month>
          <conf-name>58th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>July 5-10, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <fpage>5055</fpage>
          <lpage>5070</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.454</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>See</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Get to the point: summarization with pointer-generator networks</article-title>
          <year>2017</year>
          <month>7</month>
          <conf-name>55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</conf-name>
          <conf-date>July 30-August 4, 2017</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <fpage>1073</fpage>
          <lpage>1083</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/p17-1099</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ouyang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Applying regression models to query-focused multi-document summarization</article-title>
          <source>Inf Processing Manage</source>
          <year>2011</year>
          <month>3</month>
          <volume>47</volume>
          <issue>2</issue>
          <fpage>227</fpage>
          <lpage>237</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2010.03.005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>02</month>
          <day>15</day>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1234</fpage>
          <lpage>1240</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31501885"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
          <pub-id pub-id-type="medline">31501885</pub-id>
          <pub-id pub-id-type="pii">5566506</pub-id>
          <pub-id pub-id-type="pmcid">PMC7703786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <year>2019</year>
          <month>6</month>
          <conf-name>2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</conf-name>
          <conf-date>June 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>4171</fpage>
          <lpage>4186</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lapata</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Hierarchical transformers for multi-document summarization</article-title>
          <year>2019</year>
          <month>7</month>
          <conf-name>57th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>July 28-August 2, 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <fpage>5070</fpage>
          <lpage>5081</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/p19-1500</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hendrycks</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gimpel</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Gaussian error linear units (GELUs)</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on July 8, 2020.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1606.08415"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bahdanau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Neural machine translation by jointly learning to align and translate</article-title>
          <year>2015</year>
          <month>05</month>
          <conf-name>3rd International Conference on Learning Representation</conf-name>
          <conf-date>May 7-9, 2015</conf-date>
          <conf-loc>San Diego, CA</conf-loc>
          <pub-id pub-id-type="doi">10.48550/arXiv.1409.0473</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Van</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Gulcehre</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bahdanau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bougares</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Schwenk</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Learning phrase representations using RNN encoder--decoder for statistical machine translation</article-title>
          <year>2014</year>
          <month>10</month>
          <conf-name>2014 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>October 26-28, 2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <fpage>1724</fpage>
          <lpage>1734</lpage>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1179</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Batra</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bordes</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fisch</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Parikh</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>ParlAI: a dialog research software platform</article-title>
          <year>2017</year>
          <month>09</month>
          <conf-name>2017 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</conf-name>
          <conf-date>September 7-11, 2017</conf-date>
          <conf-loc>Copenhagen, Denmark</conf-loc>
          <fpage>79</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/D17-2014</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sennrich</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Haddow</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Birch</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Neural machine translation of rare words with subword units</article-title>
          <year>2016</year>
          <month>8</month>
          <conf-name>54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</conf-name>
          <conf-date>August 7-12, 2016</conf-date>
          <conf-loc>Berlin, Germany</conf-loc>
          <fpage>1715</fpage>
          <lpage>1725</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/p16-1162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kingma</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Ba</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Adam: a method for stochastic optimization</article-title>
          <year>2015</year>
          <month>05</month>
          <conf-name>3rd International Conference on Learning Representation</conf-name>
          <conf-date>May 7-9, 2015</conf-date>
          <conf-loc>San Diego, CA</conf-loc>
          <pub-id pub-id-type="doi">10.48550/arXiv.1412.6980</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
