<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i11e38095</article-id>
      <article-id pub-id-type="pmid">36399375</article-id>
      <article-id pub-id-type="doi">10.2196/38095</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Medical Text Simplification Using Reinforcement Learning (TESLEA): Deep Learning–Based Text Simplification Approach</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Hao</surname>
            <given-names>Tianyong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhang</surname>
            <given-names>Tianlin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kim</surname>
            <given-names>Seongsoon</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Suominen</surname>
            <given-names>Hanna</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Phatak</surname>
            <given-names>Atharva</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science</institution>
            <institution>Lakehead University</institution>
            <addr-line>955 Oliver Road</addr-line>
            <addr-line>Thunder Bay, ON, P7B 5E1</addr-line>
            <country>Canada</country>
            <phone>1 8073558351</phone>
            <email>phataka@lakeheadu.ca</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3471-4784</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Savage</surname>
            <given-names>David W</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2837-3127</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Ohle</surname>
            <given-names>Robert</given-names>
          </name>
          <degrees>MSc, MA, MBBCh</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8263-0556</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>Jonathan</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0743-5720</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Mago</surname>
            <given-names>Vijay</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9741-3463</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science</institution>
        <institution>Lakehead University</institution>
        <addr-line>Thunder Bay, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>NOSM University</institution>
        <addr-line>Thunder Bay, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>NOSM University</institution>
        <addr-line>Sudbury, ON</addr-line>
        <country>Canada</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Atharva Phatak <email>phataka@lakeheadu.ca</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>11</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>18</day>
        <month>11</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>11</issue>
      <elocation-id>e38095</elocation-id>
      <history>
        <date date-type="received">
          <day>18</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>27</day>
          <month>6</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>8</day>
          <month>8</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>12</day>
          <month>10</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Atharva Phatak, David W Savage, Robert Ohle, Jonathan Smith, Vijay Mago. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 18.11.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/11/e38095" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>In most cases, the abstracts of articles in the medical domain are publicly available. Although these are accessible by everyone, they are hard to comprehend for a wider audience due to the complex medical vocabulary. Thus, simplifying these complex abstracts is essential to make medical research accessible to the general public.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to develop a deep learning–based text simplification (TS) approach that converts complex medical text into a simpler version while maintaining the quality of the generated text.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>A TS approach using reinforcement learning and transformer–based language models was developed. Relevance reward, Flesch-Kincaid reward, and lexical simplicity reward were optimized to help simplify jargon-dense complex medical paragraphs to their simpler versions while retaining the quality of the text. The model was trained using 3568 complex-simple medical paragraphs and evaluated on 480 paragraphs via the help of automated metrics and human annotation.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The proposed method outperformed previous baselines on Flesch-Kincaid scores (11.84) and achieved comparable performance with other baselines when measured using ROUGE-1 (0.39), ROUGE-2 (0.11), and SARI scores (0.40). Manual evaluation showed that percentage agreement between human annotators was more than 70% when factors such as fluency, coherence, and adequacy were considered.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>A unique medical TS approach is successfully developed that leverages reinforcement learning and accurately simplifies complex medical paragraphs, thereby increasing their readability. The proposed TS approach can be applied to automatically generate simplified text for complex medical text data, which would enhance the accessibility of biomedical research to a wider audience.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>medical text simplification</kwd>
        <kwd>reinforcement learning</kwd>
        <kwd>natural language processing</kwd>
        <kwd>manual evaluation</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Research from the field of biomedicine contains essential information about new clinical trials on topics related to new drugs and treatments for a variety of diseases. Although this information is publicly available, it often has complex medical terminology, making it difficult for the general public to understand. One way to address this problem is by converting the complex medical text into a simpler language that can be understood by a wider audience. Although manual text simplification (TS) is one way to address the problem, it cannot be scaled to the rapidly expanding body of biomedical literature. Therefore, there is a need for the development of <italic>natural language processing</italic> approaches that can automatically perform TS.</p>
      </sec>
      <sec>
        <title>Related Studies</title>
        <sec>
          <title>TS Approaches</title>
          <p>Initial research in the field of TS focused on <italic>lexical simplification</italic> (LS) [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. An LS system typically involves replacing complex words with their simpler alternatives using lexical databases, such as the <italic>Paraphrase Database</italic> [<xref ref-type="bibr" rid="ref3">3</xref>], WordNet [<xref ref-type="bibr" rid="ref4">4</xref>], or using language models, such as <italic>bidirectional encoder representations from transformer</italic>s (BERT) [<xref ref-type="bibr" rid="ref5">5</xref>]. Recent research defines TS as a <italic>sequence-to-sequence</italic> (seq2seq) task and has approached it by leveraging model architectures from other seq2seq tasks such as machine translation and text summarization [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. Nisioi et al [<xref ref-type="bibr" rid="ref9">9</xref>] proposed a neural <italic>seq2seq</italic> model, which used <italic>long short-term memories</italic> (LSTMs) for automatic TS. It was trained on simple-complex sentence pairs and showed through human evaluations that the TS system–generated outputs ultimately preserved meaning and were grammatically correct [<xref ref-type="bibr" rid="ref9">9</xref>]. Afzal et al [<xref ref-type="bibr" rid="ref10">10</xref>] incorporated LSTMs to create a quality-aware text summarization system for medical data. Zhang and Lapata [<xref ref-type="bibr" rid="ref11">11</xref>] developed an LSTM-based neural encoder-decoder TS model and trained it using <italic>reinforcement learning</italic> (RL) to directly optimize SARI [<xref ref-type="bibr" rid="ref12">12</xref>] scores along with a few other rewards. SARI is a widely used metric for automatic evaluation of TS.</p>
          <p>With the recent progress in natural language processing research, LSTM-based models were outperformed by transformer [<xref ref-type="bibr" rid="ref13">13</xref>]-based language models [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. Transformers follow an encoder-decoder structure with both the encoder and decoder made up of <italic>L</italic> identical layers. Each layer consists of 2 sublayers, one being a feed-forward layer and the other a multihead attention layer. Transformer-based language models, such as BART [<xref ref-type="bibr" rid="ref14">14</xref>], generative pretraining transformer (GPT) [<xref ref-type="bibr" rid="ref15">15</xref>], and <italic>text-to-text-transfer-transformer</italic> [<xref ref-type="bibr" rid="ref16">16</xref>], have achieved strong performance on natural language generation tasks such as text summarization and machine translation.</p>
          <p>Building on the success of transformer-based language models, recently Martin et al [<xref ref-type="bibr" rid="ref17">17</xref>] introduced <italic>multilingual unsupervised sentence simplification</italic> (MUSS) [<xref ref-type="bibr" rid="ref17">17</xref>], a BART [<xref ref-type="bibr" rid="ref14">14</xref>]-based language model, which achieved state-of-the-art performance on TS benchmarks by training on paraphrases mined from CCNet [<xref ref-type="bibr" rid="ref18">18</xref>] corpus. Zhao et al [<xref ref-type="bibr" rid="ref19">19</xref>] proposed a semisupervised approach that incorporated the back-translation architecture along with denoising autoencoders for the purpose of automatic TS. Unsupervised TS is also an active area of research but has been primarily limited to LS. However, in a recent study, Surya et al [<xref ref-type="bibr" rid="ref20">20</xref>] proposed an unsupervised approach to perform TS at both the lexical and syntactic levels. In general, research in the field of TS has been focused mostly on sentence-level simplification. However, Sun et al [<xref ref-type="bibr" rid="ref21">21</xref>] proposed a document-level data set (D-wikipedia) and baseline models to perform document-level simplification. Similarly, Devaraj et al [<xref ref-type="bibr" rid="ref8">8</xref>] proposed a BART [<xref ref-type="bibr" rid="ref14">14</xref>]-based model that was trained using unlikelihood loss for the purpose of paragraph-level medical TS. Although their training regime penalizes the terms considered “jargon” and increases the readability, the generated text has lower quality and diversity [<xref ref-type="bibr" rid="ref8">8</xref>]. Thus, the lack of document- or paragraph-level simplification makes this an important work in the advancement of the field.</p>
        </sec>
        <sec>
          <title>TS Data Sets</title>
          <p>The majority of TS research uses data extracted from Wikipedia and news articles [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. These data sets are paired sentence-level data sets (ie, for each complex sentence, there is a corresponding simple sentence). TS systems have heavily relied on sentence-level data sets, extracted from regular and simple English Wikipedia, such as WikiLarge [<xref ref-type="bibr" rid="ref11">11</xref>], because they are publicly available. It was later shown by Xu [<xref ref-type="bibr" rid="ref24">24</xref>] that there are issues with data quality for the data sets extracted from Wikipedia. They proposed the Newsela corpus, which was created by educators who rewrote news articles for different school-grade levels. Automatic sentence alignment methods [<xref ref-type="bibr" rid="ref25">25</xref>] were used on the Newsela corpus to create a sentence-level TS data set. Despite the advancements in research on sentence-level simplification, there is a need for TS systems that can simplify text at a paragraph level.</p>
          <p>Recent work has focused on the construction of document-level simplification data sets [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Sun et al [<xref ref-type="bibr" rid="ref21">21</xref>] constructed a document-level data set, called D-Wikipedia, by aligning the English Wikipedia and Simple English Wikipedia spanning 143,546 article pairs. Although there are many data sets available for sentence-level TS, data sets for domain-specific paragraph-level TS are lacking. In the field of medical TS, Van den Bercken et al [<xref ref-type="bibr" rid="ref27">27</xref>] constructed a sentence-level simplification data set using sentence alignment methods. Recently, Devaraj et al [<xref ref-type="bibr" rid="ref8">8</xref>] proposed the first paragraph-level medical simplification data set, containing 4459 simple-complex pairs of text, and this is the data set used for the analysis and baseline training in this study. A snippet of a complex paragraph and its simplified version from the data set proposed by Devaraj et al [<xref ref-type="bibr" rid="ref8">8</xref>] is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. The data set is open sourced and publicly available [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Complex medical paragraph and the corresponding simple medical paragraph from the dataset.</p>
            </caption>
            <graphic xlink:href="medinform_v10i11e38095_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>TS Evaluation</title>
          <p>The evaluation of TS usually falls into 2 categories: automatic evaluations and manual (ie, human) evaluations. Because of the subjective nature of TS, it has been suggested that the best approach is to perform manual evaluations, based on criteria such as fluency, meaning preservation, and simplicity [<xref ref-type="bibr" rid="ref20">20</xref>]. Automatic evaluation metrics most commonly used include readability indices such as Flesch-Kincaid Reading Ease [<xref ref-type="bibr" rid="ref29">29</xref>], <italic>Flesch-Kincaid Grade Level</italic> (FKGL) [<xref ref-type="bibr" rid="ref29">29</xref>], <italic>Automated Readability Index</italic> (ARI), Coleman-Liau index, and metrics for natural language generation tasks such as SARI [<xref ref-type="bibr" rid="ref12">12</xref>] and BLEU [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
          <p>Readability indices are used to assign a grade level to text signifying its simplicity. All the readability indices are calculated using some combination of word weighting, syllable, letter, or word counts, and are shown to measure some level of simplicity. Automatic evaluation metrics, such as BLEU [<xref ref-type="bibr" rid="ref30">30</xref>] and SARI [<xref ref-type="bibr" rid="ref12">12</xref>], are widely used in TS research, with SARI [<xref ref-type="bibr" rid="ref12">12</xref>] having specifically been developed for TS tasks. SARI is computed by comparing the generated simplifications with both the source and target references. It computes an average of <italic>F</italic><sub>1</sub>-score for 3 <italic>n-gram</italic> overlap operations: additions, keeps, and deletions. Both BLEU [<xref ref-type="bibr" rid="ref30">30</xref>] and SARI [<xref ref-type="bibr" rid="ref12">12</xref>] are n-gram–based metrics, which may fail to capture the semantics of the generated text.</p>
        </sec>
      </sec>
      <sec>
        <title>Objective</title>
        <p>The aim of this study is to develop an automatic TS approach that is capable of simplifying medical text data at a paragraph level, with the goal of providing greater accessibility of biomedical research. This paper uses RL-based training to directly optimize 2 properties of simplified text: relevance and simplicity. <italic>Relevance</italic> is defined as simplified text that retains salient and semantic information from the original article. <italic>Simplicity</italic> is defined as simplified text that is easy to understand and lexically simple. These 2 properties are optimized using TS-specific rewards, resulting in a system that outperforms previous baselines on Flesch-Kincaid scores. Extensive human evaluations are conducted with the help of domain experts to judge the quality of the generated text.</p>
        <p>The remainder of the paper is organized as follows: The “Methods” section provides details on the data set, the training procedure, and the proposed model, and describes how automatic and human evaluations were conducted to analyze the outputs generated by the proposed model (TESLEA). The “Results” section provides a brief description of the baseline models and the results obtained by conducting automatic and manual evaluation of the generated text. Finally under the “Discussion” section, we highlight the limitations, future work, and draw conclusions.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Model Objective</title>
        <p>Given a complex medical paragraph, the goal of this work is to generate a simplified paragraph that is concise and captures the salient information expressed in the complex text. To accomplish this, an RL-based simplification model is proposed, which optimizes multiple rewards during training, and is tuned using a paragraph-level medical TS data set.</p>
      </sec>
      <sec>
        <title>Data Set</title>
        <p>The Cochrane Database of Scientific Reviews is a health care database with information on a wide range of clinical topics. Each review includes a plain language summary (PLS) written by the authors who follow guidelines to structure the summaries. PLSs are supposed to be clear, understandable, and accessible, especially for a general audience not familiar with the field of medicine. PLSs are highly heterogeneous in nature, and are not paired (ie, for every complex sentence there may not be a corresponding simpler version). However, Devaraj et al [<xref ref-type="bibr" rid="ref8">8</xref>] used the Cochrane Database of Scientific Reviews data to produce a paired data set, which has 4459 pairs of complex-simple text, with each text containing less than 1024 tokens so that it can be fed into the BART [<xref ref-type="bibr" rid="ref14">14</xref>] model for the purpose of TS. The pioneering data set developed by Devaraj et al [<xref ref-type="bibr" rid="ref8">8</xref>] is used in this study for training the models and is publicly available [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
      </sec>
      <sec>
        <title>TESLEA: TS Using RL</title>
        <sec>
          <title>Model and Rewards</title>
          <p>The TS solution proposed for the task of simplifying complex medical text uses an RL-based simplification model, which optimizes multiple rewards (<italic>relevance reward</italic>, <italic>Flesch-Kincaid Grade rewards, and lexical simplicity rewards</italic>) to achieve a more complete and concise simplification<italic>.</italic> The following subsections introduce the computation of these rewards, along with the training procedure.</p>
        </sec>
        <sec>
          <title>Relevance Reward</title>
          <p>Relevance reward measures how well the semantics of the target text is captured in its simplified version. This is calculated by computing the cosine similarity between the target text embedding (<italic>E<sub>T</sub></italic>) and the generated text embedding (<italic>E<sub>G</sub></italic>). BioSentVec [<xref ref-type="bibr" rid="ref31">31</xref>], a text embedding model trained on medical documents, is used to generate the text embeddings. The steps to calculate the relevance score are depicted in Algorithm 1.</p>
          <p>
            <graphic xlink:href="medinform_v10i11e38095_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </p>
          <p>The <italic>RelevanceReward</italic> function takes 3 arguments as input, namely, target text (<italic>T</italic>), generated text (<italic>G</italic>), and the embedding model (<italic>M</italic>). The function <italic>ComputeEmbedding</italic> takes the input text and embedding model (<italic>M</italic>) as input and generates the relevant text embedding. Finally, cosine similarity between generated text embedding (<italic>E<sub>G</sub></italic>) and target text embedding (<italic>E<sub>T</sub></italic>) is calculated to get the reward (Algorithm 1, line 4).</p>
        </sec>
        <sec>
          <title>Flesch-Kincaid Grade Reward</title>
          <p>FKGL refers to the grade level that must be attained to comprehend the presented information. A higher FKGL score indicates that the text is more complex, and a lower score indicates that the text is simpler. The FKGL for a text (S) is calculated using equation 1 [<xref ref-type="bibr" rid="ref29">29</xref>]:</p>
          <disp-formula>FKGL(S) = 0.38 × (total words/total sentences) + 1.8 × (total syllables/total words) – (15.59) <bold>(1)</bold></disp-formula>
          <p>The FKGL reward (<italic>R<sub>Flesch</sub></italic>) is designed to reduce the complexity of generated text and is calculated as presented in Algorithm 2.</p>
          <p>
            <graphic xlink:href="medinform_v10i11e38095_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </p>
          <p>In Algorithm 2, the function <italic>FleschKincaidReward</italic> takes 2 arguments as inputs, namely, generated text (<italic>G</italic>) and target text (<italic>T</italic>). The <italic>FKGLScore</italic> function calculates the FKGL for the given text. Once the FKGL for <italic>T</italic> and <italic>G</italic> is calculated, the Flesch-Kincaid reward (<italic>R<sub>Flesch</sub></italic>) is calculated as the relative difference between <italic>r</italic>(<italic>T</italic>) and <italic>r</italic>(<italic>G</italic>) (Algorithm 2, line 4), where <italic>r</italic>(<italic>T</italic>) and <italic>r</italic>(<italic>G</italic>) denote the FKGL of the target and generated text.</p>
        </sec>
        <sec>
          <title>Lexical Simplicity Reward</title>
          <p>Lexical simplicity is used to measure whether the words in the generated text (<italic>G</italic>) are simpler than the words in the source text (<italic>S</italic>). Laban et al [<xref ref-type="bibr" rid="ref26">26</xref>] proposed a lexical simplicity reward that uses the correlation between word difficulty and word frequency [<xref ref-type="bibr" rid="ref32">32</xref>]. As word frequency follows <italic>zipf law</italic>, Laban et al [<xref ref-type="bibr" rid="ref26">26</xref>] used it to design the reward function, which involves calculating <italic>zipf</italic> frequency of newly inserted words, that is, <italic>Z</italic>(<italic>G</italic> – <italic>S</italic>), and deleted words, that is, <italic>Z</italic>(<italic>S – G</italic>). The lexical simplicity reward is defined in the same way as proposed by Laban et al [<xref ref-type="bibr" rid="ref26">26</xref>] and is described in Algorithm 3. The analysis of the data set proposed by Devaraj et al [<xref ref-type="bibr" rid="ref8">8</xref>] revealed that 87% of simple and complex pairs have a value of Δ<italic>Z</italic>(<italic>S</italic>, <italic>G</italic>) ≈ 0.4, where Δ<italic>Z</italic>(<italic>S</italic>, <italic>G</italic>) = <italic>Z</italic>(<italic>G</italic> – <italic>S</italic>) – <italic>Z</italic>(<italic>S</italic> – <italic>G</italic>) is the difference between the <italic>zipf</italic> frequency of inserted words and deleted words, with the value of lexical reward (<italic>R<sub>lexical</sub></italic>) scaled between 0 and 1.</p>
          <p>In Algorithm 3, <italic>LexicalSimplicityReward</italic> requires the source text (<italic>S</italic>) and the generated text (<italic>G</italic>) as the inputs. Functions <italic>ZIPFInserted</italic> [<xref ref-type="bibr" rid="ref25">25</xref>] and <italic>ZIPFDeleted</italic> [<xref ref-type="bibr" rid="ref25">25</xref>] calculate the <italic>zipf</italic> frequency of newly inserted words and the deleted words. Finally, the lexical reward (<italic>R<sub>lexical</sub></italic>) is calculated and normalized, as described in line 5.</p>
          <p>
            <graphic xlink:href="medinform_v10i11e38095_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </p>
        </sec>
      </sec>
      <sec>
        <title>Training Procedure and Baseline Model</title>
        <sec>
          <title>Pretrained BART</title>
          <p>The baseline language model used in this study for performing simplification was BART [<xref ref-type="bibr" rid="ref14">14</xref>], which is a transformer based encoder-decoder model that was pretrained using a denoising objective function. The decoder part of the model is autoregressive in nature, making it more suitable for sentence-generation tasks. Furthermore, the BART model achieves strong performance on natural language generation tasks such as summarization, which was demonstrated on XSum [<xref ref-type="bibr" rid="ref33">33</xref>] and CNN/Daily Mail [<xref ref-type="bibr" rid="ref34">34</xref>] data sets. In this case, a version of BART fine-tuned on XSUM [<xref ref-type="bibr" rid="ref33">33</xref>] data set is being used.</p>
        </sec>
        <sec>
          <title>Language Model Fine-tuning</title>
          <p>Transformer-based language models are pretrained on a large corpus of text and later fine-tuned on a downstream task by minimizing the maximum likelihood loss (<italic>Lml</italic>) function [<xref ref-type="bibr" rid="ref3">3</xref>]. Consider a paired data set <italic>C</italic>, where each instance consists of a source sentence containing <italic>n</italic> tokens <italic>x</italic> = {<italic>x</italic><sub>1</sub>,…,<italic>x<sub>n</sub></italic>} and target sequence containing <italic>m</italic> tokens <italic>y</italic> = {<italic>y</italic><sub>1</sub>,…,<italic>y<sub>n</sub></italic>}, then the <italic>Lml</italic> function is given in equation 2 with the computation described in Algorithm 4.</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i11e38095_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <italic>θ</italic> represents the model parameters and <italic>y</italic><sub>&#60;</sub><italic><sub>t</sub></italic> denotes preceding tokens before the position <italic>t</italic> [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
          <p>
            <graphic xlink:href="medinform_v10i11e38095_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </p>
          <p>However, the results obtained by minimizing <italic>Lml</italic> are not always optimal. There are 2 main reasons for the degradation of results. The first is called “exposure bias” [<xref ref-type="bibr" rid="ref36">36</xref>], which occurs when the model expects gold-standard data at each step of training, but does not receive appropriate supervision during testing, resulting in an accumulation of errors during prediction. The second is called “representation collapse” [<xref ref-type="bibr" rid="ref37">37</xref>], which is a degradation of the pretrained language model representations during fine-tuning. Ranzato et al [<xref ref-type="bibr" rid="ref36">36</xref>] avoided the problem of exposure bias by directly optimizing the specific discrete metric instead of minimizing the <italic>Lml</italic> with the help of an RL-based algorithm called REINFORCE [<xref ref-type="bibr" rid="ref38">38</xref>]. A variant of REINFORCE [<xref ref-type="bibr" rid="ref38">38</xref>] called Self-Critical Sequence Training [<xref ref-type="bibr" rid="ref39">39</xref>] was used in this study to directly optimize certain rewards specifically designed for TS; more information on this is provided in the following subsection.</p>
        </sec>
        <sec>
          <title>Self-critical Sequence Training</title>
          <p>TS can be formulated as an RL problem, where the “agent” (language model) interacts with the environment to take “action” (next word prediction) based on a learned “policy” (<italic>p<sub>θ</sub></italic>) defined by model parameters <italic>θ</italic>while observing some rewards (<italic>R</italic>). In this work, BART [<xref ref-type="bibr" rid="ref14">14</xref>] was used as the language model, and the REINFORCE [<xref ref-type="bibr" rid="ref38">38</xref>] algorithm was used to learn an optimal policy that maximizes rewards. Specifically, REINFORCE was used with a baseline to stabilize the training procedure using an objective function (<italic>Lpg</italic>) with a baseline reward <italic>b</italic> (equation 3):</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i11e38095_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <italic>p<sub>θ</sub></italic>(<italic>y<sub>i</sub><sup>s</sup>&#124;</italic>...) denotes the probability of the <italic>i</italic>th word conditioned on a previously generated sampled sequence by the model; <italic>r</italic>(<italic>y<sup>s</sup></italic>) denotes the reward computed for a sentence generated using sampling; denotes the source sentence, and <italic>n</italic> is the length of the generated sentence. Rewards are computed as a weighted sum of the relevance reward (<italic>R<sub>cosine</sub></italic>), <italic>R<sub>Flesch</sub></italic>, and lexical simplicity reward (<italic>R<sub>lexical</sub></italic>; <xref rid="figure2" ref-type="fig">Figure 2</xref>) and are given by:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i11e38095_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <italic>α</italic>, <italic>β</italic>, and <italic>d</italic> are the weights associated with the rewards, respectively.</p>
          <p>To approximate the baseline reward, Self-Critical Sequence Training [<xref ref-type="bibr" rid="ref39">39</xref>] was used. The baseline was calculated by computing reward values for a sentence that has been generated using greedy decoding <italic>r</italic>(<italic>y</italic>*) by the current model and its computation is described in Algorithm 5. The loss function is defined in equation 5:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i11e38095_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <italic>y</italic>* denotes the sentence generated using greedy decoding. More details on greedy decoding are described in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> (see also [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref42">42</xref>]).</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Compute Rewards function calculates a weighted sum of three rewards: Fkgl Reward, Lexical Simplicity Reward, Relevance Reward.</p>
            </caption>
            <graphic xlink:href="medinform_v10i11e38095_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>
            <graphic xlink:href="medinform_v10i11e38095_fig16.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </p>
          <p>Intuitively, by minimizing the loss described in equation 5, the likelihood of choosing the samples sequence (<italic>y<sup>s</sup></italic>) is promoted if the reward obtained for sampled sequence, <italic>r</italic>(<italic>y<sup>s</sup></italic>), is greater than the reward obtained for the baseline rewards, that is, the samples that return higher reward than <italic>r</italic>(<italic>y</italic>*). The samples that obtain a lower reward are subsequently suppressed. The model is trained using a combination of <italic>Lml</italic> and policy gradient loss similar to [<xref ref-type="bibr" rid="ref43">43</xref>]. The overall loss is given as follows:</p>
          <disp-formula><italic>L</italic> = <italic>γLpg</italic> + (1 – <italic>γ</italic>)<italic>Lml</italic> <bold> (6)</bold></disp-formula>
          <p>where <italic>γ</italic> is a scaling factor that can be tuned.</p>
        </sec>
      </sec>
      <sec>
        <title>Summary of the Training Process</title>
        <p>Overall, the training procedure follows a 2-step approach. As the pretrained BART [<xref ref-type="bibr" rid="ref14">14</xref>] was not trained on the medical domain–related text, it was first fine-tuned on the document-level paired data set [<xref ref-type="bibr" rid="ref8">8</xref>] by minimizing the <italic>Lml</italic> (maximum likelihood estimation [MLE]; equation 2). In the second part, the fine-tuned BART model was trained further using RL. The RL procedure of TESLEA involves 2 steps: (1) the RL step and (2) the MLE optimization step, which are both shown in <xref rid="figure3" ref-type="fig">Figure 3</xref> and further described in Algorithm 6. The given simple-complex text pairs are converted to tokens as required by the BART model. In the MLE step, these tokens are used to compute <italic>logits</italic> from the model, and then finally MLE loss is computed. In the RL step, the model generates simplified text using 2 decoding strategies: (1) greedy decoding and (2) multinomial sampling. Rewards are computed as weighted sums (<xref rid="figure3" ref-type="fig">Figure 3</xref>) for sentences generated using both the decoding strategies. These rewards are then used to calculate the loss for the RL step. Finally, a weighted sum of losses is computed that is used to estimate the gradients and update model parameters. All the hyperparameter settings used are included in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> (see also [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref47">47</xref>]).</p>
        <p>
          <graphic xlink:href="medinform_v10i11e38095_fig17.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Reinforcement learning–based training procedure for TESLEA. MLE: maximum likelihood estimation; RL: reinforcement learning.</p>
          </caption>
          <graphic xlink:href="medinform_v10i11e38095_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Automatic Metrics</title>
        <p>Two readability indices were used to perform automatic evaluations of the generated text, namely, FKGL and Automatic Readability Indices (ARIs). The SARI score is a standard metric for TS. The F-1 versions of ROUGE-1 and ROUGE-2 [<xref ref-type="bibr" rid="ref44">44</xref>] scores were also reported. Readers can find more details about these metrics in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. To measure the quality of the generated text, the criteria proposed by Yuan et al [<xref ref-type="bibr" rid="ref45">45</xref>] were used, which are mentioned in the “Automatic Evaluation Metrics” section in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. The criteria proposed by Yuan et al [<xref ref-type="bibr" rid="ref45">45</xref>] can be automatically computed using a language model–based metric called “BARTScore.” Further details on how to use BARTScore to measure the quality of the generated text are also mentioned in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
      </sec>
      <sec>
        <title>Human Evaluations</title>
        <p>In this study, 3-domain experts judge the quality of the generated text based on the factors mentioned in the previous section. The evaluators rate the text on a Likert scale from 1 to 5. First, simplified test data were generated using TESLEA, and then 51 generated paragraphs were randomly selected, creating 3 subsets containing 17 paragraphs each. Every evaluator was presented with 2 subsets, that is, a total of 34 complex-simple TESLEA-generated paragraphs. The evaluations were conducted via Google Forms, and the human annotators were asked to measure the quality of simplification for informativeness (INFO), fluency (FLU), coherence (COH), factuality (FAC), and adequacy (ADE) (<xref rid="figure4" ref-type="fig">Figure 4</xref>). All the data collected were stored in CSV files for statistical analysis.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>A sample question seen by the human annotator.</p>
          </caption>
          <graphic xlink:href="medinform_v10i11e38095_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview</title>
        <p>This section consists of 3 subsections, namely, (1) Baseline Models, (2) Automatic Evaluations, and (3) Human Evaluations. The first section highlights the baseline models used for comparison and analysis. The second section discusses the results obtained by performing automatic evaluations of the model. The third and final section discusses the results obtained from human assessments and analyzes the relationship between human annotations and automatic metrics.</p>
      </sec>
      <sec>
        <title>Baseline Models</title>
        <p>TESLEA is compared with other strong baseline models and their details are discussed below:</p>
        <list list-type="bullet">
          <list-item>
            <p>BART-Fine-tuned: BART-Fine-tuned is a BART-large model fine-tuned using an <italic>Lml</italic> on the data set proposed by Devaraj et al [<xref ref-type="bibr" rid="ref8">8</xref>]. Studies have shown that large pretrained models often perform competitively when fine-tuned for downstream tasks, thus making this a strong competitor.</p>
          </list-item>
          <list-item>
            <p>BART-UL: Devaraj et al [<xref ref-type="bibr" rid="ref8">8</xref>] also proposed BART-UL for paragraph-level medical TS. It is the first model to perform paragraph-level medical TS and has achieved strong results on automated metrics. BART-UL was trained using an unlikelihood objective function that penalizes the model for generating technical words (ie, complex words). Further details on the training procedure of BART-UL are described in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </list-item>
          <list-item>
            <p>MUSS: MUSS [<xref ref-type="bibr" rid="ref17">17</xref>] is a BART-based language model that was trained by mining paraphrases from the CCNet corpus [<xref ref-type="bibr" rid="ref18">18</xref>]. MUSS was trained on a data set consisting of 1 million paraphrases, helping it achieve a strong SARI score. Although MUSS is trained on a sentence-level data set, it still serves as a strong baseline for comparison. Further details on the training procedure for MUSS are discussed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref></p>
            <p>.</p>
          </list-item>
          <list-item>
            <p>Keep it Simple (KIS): Laban et al [<xref ref-type="bibr" rid="ref26">26</xref>] proposed an unsupervised approach for paragraph-level TS. KIS is trained using RL and uses the GPT-2 model as a backbone. KIS has shown strong performance on SARI scores beating many supervised and unsupervised TS approaches. Additional details on the training procedure for KIS are described in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </list-item>
          <list-item>
            <p>PEGASUS models: PEGASUS is a transformer-based encoder-decoder model that has achieved state-of-the-art results on many text-summarization data sets. It was specifically designed for the task of text summarization. In our analysis, we used 2 variants of PEGASUS models, namely, (1) PEGASUS-large, the large variant of Pegasus model, and (2) PEGASUS-pubmed-large, the large variant of the PEGASUS model that was pretrained on a PubMed data set. Both the PEGASUS models were fine-tuned using <italic>Lml</italic> on the data set proposed by Devaraj et al [<xref ref-type="bibr" rid="ref8">8</xref>]. For more information regarding the PEGASUS model, the readers are suggested to refer to [<xref ref-type="bibr" rid="ref46">46</xref>].</p>
          </list-item>
        </list>
        <p>The models described above are the only ones available for medical TS as of June 2022.</p>
      </sec>
      <sec>
        <title>Results of Automatic Metrics</title>
        <p>The metrics used for automatic evaluation are FKGL, ARI, ROUGE-1, ROUGE-2, SARI, and BARTScore. The mean readability indices scores (ie, FKGL and ARI) obtained by various models are reported in <xref ref-type="table" rid="table1">Table 1</xref>. ROUGE-1, ROUGE-2, and SARI scores are reported in <xref ref-type="table" rid="table2">Table 2</xref> and BARTScore is reported in <xref ref-type="table" rid="table3">Table 3</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Flesch-Kincaid Grade Level and Automatic Readability Index for the generated text.<sup>a</sup></p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="370"/>
            <col width="0"/>
            <col width="300"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Text</td>
                <td>Flesch-Kincaid Grade Level</td>
                <td>Automatic Readability Index</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Baseline</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="2">
                  <break/>
                </td>
                <td>Technical abstracts</td>
                <td colspan="2">14.42</td>
                <td>15.58</td>
              </tr>
              <tr valign="top">
                <td>Gold-standard references</td>
                <td colspan="2">13.11</td>
                <td>15.08</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Model generated</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="7">
                  <break/>
                </td>
                <td>BART-Fine-tuned</td>
                <td colspan="2">13.45</td>
                <td>15.32</td>
              </tr>
              <tr valign="top">
                <td>BART-UL</td>
                <td colspan="2">11.97</td>
                <td>13.73<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>TESLEA</td>
                <td colspan="2">11.84<sup>b</sup></td>
                <td>13.82</td>
              </tr>
              <tr valign="top">
                <td>MUSS<sup>c</sup></td>
                <td colspan="2">14.29</td>
                <td>17.29</td>
              </tr>
              <tr valign="top">
                <td>Keep it Simple</td>
                <td colspan="2">14.15</td>
                <td>17.05</td>
              </tr>
              <tr valign="top">
                <td>PEGASUS-large</td>
                <td colspan="2">14.53</td>
                <td>17.55</td>
              </tr>
              <tr valign="top">
                <td>PEGASUS-pubmed-large</td>
                <td colspan="2">16.35</td>
                <td>19.8</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>TESLEA significantly reduces FKGL and ARI scores when compared with plain language summaries.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Best score.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>MUSS: multilingual unsupervised sentence simplification.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>ROUGE-1, ROUGE-2, and SARI scores for the generated text.<sup>a</sup></p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="340"/>
            <col width="200"/>
            <col width="220"/>
            <col width="240"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>ROUGE-1</td>
                <td>ROUGE-2</td>
                <td>SARI</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>BART-Fine-tuned</td>
                <td>0.40</td>
                <td>0.11</td>
                <td>0.39</td>
              </tr>
              <tr valign="top">
                <td>BART-UL</td>
                <td>0.38</td>
                <td>0.14</td>
                <td>0.40<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>TESLEA</td>
                <td>0.39</td>
                <td>0.11</td>
                <td>0.40<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>MUSS<sup>c</sup></td>
                <td>0.23</td>
                <td>0.03</td>
                <td>0.34</td>
              </tr>
              <tr valign="top">
                <td>Keep it Simple</td>
                <td>0.23</td>
                <td>0.03</td>
                <td>0.32</td>
              </tr>
              <tr valign="top">
                <td>PEGASUS-large</td>
                <td>0.44<sup>b</sup></td>
                <td>0.18<sup>b</sup></td>
                <td>0.40<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>PEGASUS-pubmed-large</td>
                <td>0.42</td>
                <td>0.16</td>
                <td>0.40<sup>b</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>TESLEA achieves similar performance to other models. Higher scores of ROUGE-1, ROUGE-2, and SARI are desirable.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>Best performance.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>MUSS: multilingual unsupervised sentence simplification.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Faithfulness Score and F-score for the generated text by the models.<sup>a</sup></p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="420"/>
            <col width="340"/>
            <col width="240"/>
            <thead>
              <tr valign="top">
                <td>Models</td>
                <td>Faithfulness Score</td>
                <td>F-score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>BART-Fine-tuned</td>
                <td>0.137</td>
                <td>0.078</td>
              </tr>
              <tr valign="top">
                <td>BART-UL</td>
                <td>0.242</td>
                <td>0.061</td>
              </tr>
              <tr valign="top">
                <td>TESLEA</td>
                <td>0.366<sup>b</sup></td>
                <td>0.097<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>MUSS<sup>c</sup></td>
                <td>0.031</td>
                <td>0.029</td>
              </tr>
              <tr valign="top">
                <td>Keep it Simple</td>
                <td>0.030</td>
                <td>0.028</td>
              </tr>
              <tr valign="top">
                <td>PEGASUS-large</td>
                <td>0.197</td>
                <td>0.073</td>
              </tr>
              <tr valign="top">
                <td>PEGASUS-pubmed-large</td>
                <td>0.29</td>
                <td>0.063</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Higher scores of Faithfulness and F-score are desirable.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>Highest score.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>MUSS: multilingual unsupervised sentence simplification.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <sec>
          <title>Readability Indices, ROUGE, and SARI Scores</title>
          <p>The readability indices scores reported in <xref ref-type="table" rid="table1">Table 1</xref> suggest that the FKGL scores obtained by TESLEA are better (ie, a lower score) when compared with the FKGL scores obtained by comparing technical abstracts (ie, complex medical paragraphs available in the data set) with the gold-standard references (ie, simple medical paragraphs corresponding to the complex medical paragraphs). Moreover, TESLEA achieves the lowest FKGL score (11.84) when compared with baseline models, indicating significant improvement in the TS. The results suggest that (1) BART-based transformer models are capable of performing simplification at the paragraph level such that the outputs are at a reduced reading level (FKGL) when compared with technical abstracts, gold-standard references, and baseline models. (2) The proposed method to optimize TS-specific rewards allows the generation of text with greater readability than even the gold-standard references, as indicated by the FKGL scores in <xref ref-type="table" rid="table1">Table 1</xref>. The reduction in FKGL scores can be explained by the fact that FKGL was a part of a reward (<italic>R<sub>Flesch</sub></italic>) that was directly being optimized.</p>
          <p>In addition, we report the SARI [<xref ref-type="bibr" rid="ref12">12</xref>] and ROUGE scores [<xref ref-type="bibr" rid="ref44">44</xref>] as shown in <xref ref-type="table" rid="table2">Table 2</xref>. SARI is a standard automatic metric used in sentence-level TS tasks. The ROUGE score is another standard metric in text summarization tasks. The results show that TESLEA matches the performance of baseline models on both ROUGE and SARI scores. Although there are no clear patterns when ROUGE and SARI scores are considered, there are differences in the quality of text generated by these models and these are explained in the “Text Quality Measure” subsection.</p>
        </sec>
        <sec>
          <title>Text Quality Measure</title>
          <p>There has been significant progress in designing automatic metrics that are able to capture linguistic quality of the text generated by language models. One such metric that is able to measure the quality of generated text is BARTScore [<xref ref-type="bibr" rid="ref45">45</xref>]. BARTScore has shown strong correlation with human assessments on various tasks ranging from machine translation to text summarization. BARTScore has 4 different metrics (ie, Faithfulness Score, Precision, Recall, F-score), which can be used to measure different qualities of generated text. Further details on how to use BARTScore are mentioned in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
          <p>According to the analysis conducted by Yuan et al [<xref ref-type="bibr" rid="ref45">45</xref>], Faithfulness Score measures 3 aspects of generated text via COH, FLU, and FAC. The F-score measures 2 aspects of generated text (INFO and ADE). In our analysis, we use these 2 variants of BARTScore to measure COH, FLU, FAC, INFO, and ADE. TESLEA achieves the highest values (<xref ref-type="table" rid="table3">Table 3</xref>) of Faithfulness Score (0.366) and F-score (0.097), indicating that the rewards designed for the purpose of TS not only help the model in generating simplified text but also on some level preserve the quality of generated text. The F-scores of all the models are relatively poor (ie, scores closer to 1 are desirable). One of the reasons for low F-scores could be the introduction of misinformation or hallucinations in the generated text, a common problem for language models, which could be addressed by adapting training strategies that focus on INFO via the help of rewards or objective functions.</p>
          <p>For qualitative analysis we randomly selected 50 sentences from the test data and calculated the average number of tokens based on BART model vocabulary. For the readability measure, we calculated the FKGL scores of these generated texts and noted any textual inconsistencies such as misinformation. The analysis revealed that the text generated by most models was significantly smaller than the gold-standard references (<xref ref-type="table" rid="table4">Table 4</xref>). Furthermore, TESLEA- and BART-UL–generated texts were significantly shorter compared with other baseline models and TESLEA had the lowest FKGL score among all the models as depicted in <xref ref-type="table" rid="table4">Table 4</xref>.</p>
          <p>From a qualitative point of view, the sentences generated by most baseline models involve significant duplication of text from the original complex medical paragraph. The outputs generated by the KIS model were incomplete and appear “noisy” in nature. One of the reasons for the noise generation could be because of unstable training due to lack of a huge corpus of domain-specific data. BART-UL–generated paragraphs are simplified as indicated by the FKGL and ARI scores, but they are extractive in nature (ie, the model learns to select simplified sentences from the original medical paragraph and combines them to form a simplification). PEGASUS-pubmed-​large–generated paragraphs are also extractive in nature and similar to BART-UL–generated paragraphs, but it was observed that they were grammatically inconsistent. In contrast to baseline models, the text generated by TESLEA was concise, semantically relevant, and simple, without involving any medical domain–related complex vocabulary. <xref rid="figure5" ref-type="fig">Figure 5</xref> shows an example of text generated by all the models, with blue text indicating the copied text.</p>
          <p>In addition to the duplicated text, the models also induced misinformation in the generated text. The most common form of induced misinformation observed was “The evidence is current up to [date],” as shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>. This text error occurred due to the structure of the data (ie, PLS contains statements related to this research, but these statements were not in the original text; thus, the model attempted to add these statements to the generated text although it is not factually correct). Thus considerable attention should be paid to including FAC measures in the training regime of these models. For a more complete assessment of the quality of simplification, human evaluation was conducted using domain experts for the text generated by TESLEA.</p>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>Average number of tokens and average Flesch-Kincaid Grade Level scores for selected samples.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="500"/>
              <col width="250"/>
              <col width="250"/>
              <thead>
                <tr valign="top">
                  <td>Model</td>
                  <td>Number of tokens</td>
                  <td>Flesch-Kincaid Grade Level</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Technical abstracts</td>
                  <td>498.11</td>
                  <td>14.37</td>
                </tr>
                <tr valign="top">
                  <td>Gold-standard references</td>
                  <td>269.74</td>
                  <td>12.77</td>
                </tr>
                <tr valign="top">
                  <td>TESLEA</td>
                  <td>131.37</td>
                  <td>12.34</td>
                </tr>
                <tr valign="top">
                  <td>BART-UL</td>
                  <td>145.08</td>
                  <td>12.66</td>
                </tr>
                <tr valign="top">
                  <td>Keep it Simple</td>
                  <td>187.59</td>
                  <td>13.78</td>
                </tr>
                <tr valign="top">
                  <td>Multilingual unsupervised sentence simplification</td>
                  <td>193.07</td>
                  <td>13.86</td>
                </tr>
                <tr valign="top">
                  <td>PEGASUS-large</td>
                  <td>272.04</td>
                  <td>13.93</td>
                </tr>
                <tr valign="top">
                  <td>PEGASUS-pubmed-large</td>
                  <td>150.00</td>
                  <td>15.09</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>Comparison of Text Generated by all the models. The highlighted blue text indicates copying. CI: Confidence Interval;  FEV: Force Expiratory Volume; N: Population size; PEV: Peak Expiratory Flow; RR: Respiratory Rate.</p>
            </caption>
            <graphic xlink:href="medinform_v10i11e38095_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure6" position="float">
            <label>Figure 6</label>
            <caption>
              <p>Example of misinformation found in Generated text. CIDSL: Cornelia de Lange syndrome; IVIg: Intravenous immune globulin;  MS: Multiple Sclerosis; PE: plasma exchange.</p>
            </caption>
            <graphic xlink:href="medinform_v10i11e38095_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Human Evaluations</title>
        <p>For this research, 3 domain experts assessed the quality of generated text, based on factors of INFO, FLU, COH, FAC, and ADE, as proposed by Yuan et al [<xref ref-type="bibr" rid="ref45">45</xref>], which are discussed in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. To measure interrater reliability, the percentage agreement between the annotators is calculated, and the results are shown in <xref ref-type="table" rid="table5">Table 5</xref>. The average percentage agreement for the factors of FLU, COH, FAC, and ADE is the highest, indicating that annotators agree among their evaluations.</p>
        <p>The average Likert score for each factor is also reported by each rater (<xref ref-type="table" rid="table6">Table 6</xref>). From the data mentioned in <xref ref-type="table" rid="table6">Table 6</xref>, the raters think that the COH and FLU have the highest quality, with the ADE, FAC, and INFO also rated reasonably high.</p>
        <p>To further assess whether results obtained by automated metrics truly signify an improvement in the quality of generated text by TESLEA, the Spearman rank correlation coefficient was calculated between human ratings and the automatic metrics for all 51 generated paragraphs (text), with the results shown in <xref ref-type="table" rid="table7">Table 7</xref>. The BARTScore has the highest correlation with human ratings for FLU, FAC, COH, and ADE compared with other metrics. A few text samples along with their human annotations and automated metric scores are shown in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> and <xref rid="figure7" ref-type="fig">Figure 7</xref>.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Average percentage interrater agreement.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="330"/>
            <col width="150"/>
            <col width="130"/>
            <col width="120"/>
            <col width="120"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Interrater agreement</td>
                <td>Informativeness, %</td>
                <td>Fluency, %</td>
                <td>Factuality, %</td>
                <td>Coherence, %</td>
                <td>Adequacy, %</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>A1<sup>a</sup> and A2<sup>b</sup></td>
                <td>82.35</td>
                <td>82.35</td>
                <td>82.35</td>
                <td>70.59</td>
                <td>82.35</td>
              </tr>
              <tr valign="top">
                <td>A1 and A3<sup>c</sup></td>
                <td>70.59</td>
                <td>58.82</td>
                <td>70.59</td>
                <td>70.59</td>
                <td>70.59</td>
              </tr>
              <tr valign="top">
                <td>A3 and A2</td>
                <td>52.94</td>
                <td>70.59</td>
                <td>74.51</td>
                <td>74.51</td>
                <td>64.71</td>
              </tr>
              <tr valign="top">
                <td>Average (% agreement)</td>
                <td>68.63</td>
                <td>70.59</td>
                <td>74.51</td>
                <td>74.51</td>
                <td>72.55</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>A1: annotator 1.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>A2: annotator 2.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>A3: annotator 3.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Average Likert score by each rater for informativeness, fluency, factuality, coherence, and adequacy.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="190"/>
            <col width="140"/>
            <col width="160"/>
            <col width="140"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>Rater</td>
                <td>Informativeness</td>
                <td>Fluency</td>
                <td>Factuality</td>
                <td>Coherence</td>
                <td>Adequacy</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>A1</td>
                <td>3.82</td>
                <td>4.12</td>
                <td>3.91</td>
                <td>3.97</td>
                <td>3.76</td>
              </tr>
              <tr valign="top">
                <td>A2</td>
                <td>3.50</td>
                <td>4.97</td>
                <td>3.59</td>
                <td>4.82</td>
                <td>3.68</td>
              </tr>
              <tr valign="top">
                <td>A3</td>
                <td>4.06</td>
                <td>3.94</td>
                <td>3.85</td>
                <td>3.94</td>
                <td>3.85</td>
              </tr>
              <tr valign="top">
                <td>Average Likert score</td>
                <td>3.79</td>
                <td>4.34</td>
                <td>3.78</td>
                <td>4.24</td>
                <td>3.76</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Spearman rank correlation coefficient between automatic metrics and human ratings for the text generated by TESLEA.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="270"/>
            <col width="150"/>
            <col width="160"/>
            <col width="130"/>
            <col width="160"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td>Metric</td>
                <td>Informativeness</td>
                <td>Fluency</td>
                <td>Factuality</td>
                <td>Coherence</td>
                <td>Adequacy</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>ROUGE-1</td>
                <td>0.18<sup>a</sup></td>
                <td>–0.04</td>
                <td>–0.01</td>
                <td>–0.05</td>
                <td>0.06</td>
              </tr>
              <tr valign="top">
                <td>ROUGE-2</td>
                <td>0.08</td>
                <td>–0.01</td>
                <td>–0.05</td>
                <td>–0.04</td>
                <td>0.05</td>
              </tr>
              <tr valign="top">
                <td>SARI</td>
                <td>0.09</td>
                <td>–0.66</td>
                <td>–0.13</td>
                <td>–0.01</td>
                <td>0.01</td>
              </tr>
              <tr valign="top">
                <td>BARTScore</td>
                <td>0.08</td>
                <td>0.32<sup>a</sup></td>
                <td>0.38<sup>a</sup></td>
                <td>0.22<sup>a</sup></td>
                <td>0.07<sup>a</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>Best result.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Samples of Complex, Simple (Gold) and generated medical paragraphs along with automated metrics and Human annotations.</p>
          </caption>
          <graphic xlink:href="medinform_v10i11e38095_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The most up-to-date research about biomedicine is often inaccessible to the general public due to the domain-specific medical terminology. A way to address this problem is by creating a system that converts complex medical information into a simpler form, thus making it accessible to everyone. In this study, a TS approach was developed that can automatically simplify complex medical paragraphs while maintaining the quality of the generated text. The proposed approach trains the transformer-based BART model to optimize rewards specific for TS, resulting in increased simplicity. The BART model is trained using the proposed RL method to optimize certain rewards that help generate simpler text while maintaining the quality of generated text. As a result, the trained model generates simplified text that reduces the complexity of the original text by 2-grade points, when measured using the FKGL [<xref ref-type="bibr" rid="ref29">29</xref>]. From the results obtained, it can be concluded that TESLEA is effective in generating simpler text compared with technical abstracts, the gold-standard references (ie, simple medical paragraphs corresponding to complex medical paragraphs), and the baseline models. Although previous work [<xref ref-type="bibr" rid="ref8">8</xref>] developed baseline models for this task, to the best of our knowledge, this is the first time RL is being applied to the field of medical TS. Moreover, previous studies failed to analyze the quality of the generated text, which this study measures via the factors of FLU, FAC, COH, ADE, and INFO. Manual evaluations of TESLEA-generated text were conducted with the help of domain experts using the aforesaid factors and further research was conducted to analyze which automatic metrics agree with manual annotations using the Spearman rank correlation coefficient. The analysis revealed that BARTScore [<xref ref-type="bibr" rid="ref45">45</xref>] best correlates with the human annotations when evaluated for a text generated by TESLEA, indicating that TESLEA learns to generate semantically relevant and fluent text, which conveys the essential information mentioned in the complex medical paragraph. These results suggest that (1) TESLEA can perform TS of medical paragraphs such that outputs are simple and maintain the quality, (2) the rewards optimized by TESLEA help the model capture syntactic and semantic information, increasing the FLU and COH of outputs, as witnessed when the outputs are evaluated by BARTScore and human annotators.</p>
      </sec>
      <sec>
        <title>Limitations and Future Work</title>
        <p>Although this research is a significant contribution to the literature on medical TS, the proposed approach does have a few limitations, addressing which can result in even better outputs. TESLEA can generate simpler versions of the text, but in some instances, it induces misinformation, resulting in reduced FAC and INFO of the generated text. Therefore, there is a need to design rewards that consider the FAC and INFO of the generated text. We also plan to conduct extensive human evaluations on a large scale for the text generated by various models (eg, KIS, BART-UL) using domain experts (ie, physicians and medical students).</p>
        <p>Transformer-based language models are sensitive to the pretraining regime, so a possible next step is to pretrain a language model on domain-specific raw data sets such as PubMed [<xref ref-type="bibr" rid="ref40">40</xref>], which will help develop domain-specific vocabulary for the model. Including these strategies may help in increasing the simplicity of the generated text.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>The interest in and need for TS in the medical domain are of growing interest as the quantity of data is continuously increasing. Automated systems, such as the one proposed in this paper, can dramatically increase accessibility to information for the general public. This work not only provides a technical solution for automated TS, but also lays out and addresses the challenges of evaluating the outputs of such systems, which can be highly subjective. It is the authors’ sincere hope that this work allows other researchers to build on and improve the quality of similar effort.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Training Procedures and Decoding Methods.</p>
        <media xlink:href="medinform_v10i11e38095_app1.docx" xlink:title="DOCX File , 129 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Hyperparameters and Evaluation Metrics.</p>
        <media xlink:href="medinform_v10i11e38095_app2.docx" xlink:title="DOCX File , 190 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Abbreviations and Examples.</p>
        <media xlink:href="medinform_v10i11e38095_app3.docx" xlink:title="DOCX File , 1060 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ARI</term>
          <def>
            <p>Automated Readability Index</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">FKGL</term>
          <def>
            <p>Flesch-Kincaid Grade Level</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">GPT</term>
          <def>
            <p>generative pretraining transformer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">MLE</term>
          <def>
            <p>maximum likelihood estimation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">KIS</term>
          <def>
            <p>Keep it Simple</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">Lml</term>
          <def>
            <p>maximum likelihood loss</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">LS</term>
          <def>
            <p>lexical simplification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">MUSS</term>
          <def>
            <p>multilingual unsupervised sentence simplification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">PLS</term>
          <def>
            <p>plain language summary</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">RFlesch</term>
          <def>
            <p>FKGL reward</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">RL</term>
          <def>
            <p>reinforcement learning</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors thank the research team at DaTALab, Lakehead University, for their support. The authors also thank Compute Canada for providing the computational resources without which this research would not have been possible. This research is funded by NSERC Discovery (RGPIN-2017-05377) held by Dr. Vijay Mago. The authors thank Mr. Aditya Singhal (MSc student at Lakehead University) for providing his feedback on the manuscript.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Minnen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Pearce</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Canning</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tait</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Simplifying text for language-impaired readers</article-title>
          <year>1999</year>
          <conf-name>Ninth Conference of the European Chapter of the Association for Computational Linguistics</conf-name>
          <conf-date>June 8-12, 1999</conf-date>
          <conf-loc>Bergen, Norway</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>269</fpage>
          <lpage>270</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/E[]99-1042"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paetzold</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Specia</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Unsupervised Lexical Simplification for Non-Native Speakers</article-title>
          <source>AAAI</source>
          <year>2016</year>
          <month>03</month>
          <day>05</day>
          <volume>30</volume>
          <issue>1</issue>
          <fpage>3761</fpage>
          <lpage>3767</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ojs.aaai.org/index.php/AAAI/article/view/9885"/>
          </comment>
          <pub-id pub-id-type="doi">10.1609/aaai.v30i1.9885</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ganitkevitch</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Van Durme</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Callison-Burch</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>PPDB: The paraphrase database</article-title>
          <source>Proceedings of the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2013</year>
          <month>6</month>
          <conf-name>The 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name>
          <conf-date>June 9-12, 2013</conf-date>
          <conf-loc>Atlanta, GA</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>758</fpage>
          <lpage>764</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/N13-1092"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/v1/p15-2070</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rebecca Thomas</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>WordNet-Based Lexical Simplification of a Document</article-title>
          <source>Proceedings of the 11th Conference on Natural Language Processing (KONVENS 2012)</source>
          <year>2012</year>
          <conf-name>The 11th Conference on Natural Language Processing (KONVENS 2012)</conf-name>
          <conf-date>September 19-21, 2012</conf-date>
          <conf-loc>Vienna, Austria</conf-loc>
          <fpage>80</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/270450791_WordNet-Based_Lexical_Simplification_of_a_Document"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Qiang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Lexical Simplification with Pretrained Encoders</article-title>
          <source>AAAI</source>
          <year>2020</year>
          <month>04</month>
          <day>03</day>
          <volume>34</volume>
          <issue>05</issue>
          <fpage>8649</fpage>
          <lpage>8656</lpage>
          <pub-id pub-id-type="doi">10.1609/aaai.v34i05.6389</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Bernhard</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gurevych</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>A monolingual tree-based translation model for sentence simplification</article-title>
          <source>Proceedings of the 23rd International Conference on Computational Linguistics (Coling 2010)</source>
          <year>2010</year>
          <conf-name>The 23rd International Conference on Computational Linguistics (Coling 2010)</conf-name>
          <conf-date>August 23-27, 2010</conf-date>
          <conf-loc>Beijing, China</conf-loc>
          <publisher-loc>Beijing, China</publisher-loc>
          <publisher-name>Coling 2010 Organizing Committee</publisher-name>
          <fpage>1353</fpage>
          <lpage>1361</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/C10-1152.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wubben</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>van den Bosch</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Krahmer</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Sentence simplification by monolingual machine translation</article-title>
          <source>Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</source>
          <year>2012</year>
          <conf-name>The 50th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>July 8-14, 2012</conf-date>
          <conf-loc>Jeju Island, Korea</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>1015</fpage>
          <lpage>1024</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P12-1107"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devaraj</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Paragraph-level Simplification of Medical Texts</article-title>
          <source>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2021</year>
          <month>6</month>
          <conf-name>The 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name>
          <conf-date>June 6-11, 2021</conf-date>
          <conf-loc>Virtual</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>4972</fpage>
          <lpage>4984</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2021.naacl-main.395.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2021.naacl-main.395</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nisioi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Štajner</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Paolo Ponzetto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dinu</surname>
              <given-names>LP</given-names>
            </name>
          </person-group>
          <article-title>Exploring neural text simplification models</article-title>
          <source>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</source>
          <year>2017</year>
          <conf-name>The 55th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>July 30-August 4, 2017</conf-date>
          <conf-loc>Vancouver, BC</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>85</fpage>
          <lpage>91</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P17-2014.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/p17-2014</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Alam</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Malik</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Malik</surname>
              <given-names>GM</given-names>
            </name>
          </person-group>
          <article-title>Clinical Context-Aware Biomedical Text Summarization Using Deep Neural Network: Model Development and Validation</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>10</month>
          <day>23</day>
          <volume>22</volume>
          <issue>10</issue>
          <fpage>e19810</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/10/e19810/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19810</pub-id>
          <pub-id pub-id-type="medline">33095174</pub-id>
          <pub-id pub-id-type="pii">v22i10e19810</pub-id>
          <pub-id pub-id-type="pmcid">PMC7647812</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lapata</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Sentence Simplification with Deep Reinforcement Learning</article-title>
          <source>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2017</year>
          <conf-name>The 2017 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>September 7-11, 2017</conf-date>
          <conf-loc>Copenhagen, Denmark</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>584</fpage>
          <lpage>594</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D17-1062.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d17-1062</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Napoles</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pavlick</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Callison-Burch</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Optimizing Statistical Machine Translation for Text Simplification</article-title>
          <source>TACL</source>
          <year>2016</year>
          <month>12</month>
          <volume>4</volume>
          <fpage>401</fpage>
          <lpage>415</lpage>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00107</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>NIPS'17: Proceedings of the 31st International Conference on Neural Information Processing Systems</source>
          <year>2017</year>
          <conf-name>NIPS'17: The 31st International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 4-9, 2017</conf-date>
          <conf-loc>Long Beach, CA</conf-loc>
          <publisher-loc>Red Hook, NY</publisher-loc>
          <publisher-name>Curran Associates Inc</publisher-name>
          <fpage>6000</fpage>
          <lpage>6010</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ghazvininejad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mohamed</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension</article-title>
          <source>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2020</year>
          <month>7</month>
          <conf-name>The 58th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>July 5-10, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>7871</fpage>
          <lpage>7880</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.acl-main.703.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.703</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Narasimhan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Salimans</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Improving language understanding by generative pre-training</article-title>
          <source>Amazon AWS</source>
          <year>2022</year>
          <access-date>2022-10-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://s3-us-west-2.amazonaws.com/openai-assets/research-covers/language-unsupervised/language_understanding_paper.pdf">https://s3-us-west-2.amazonaws.com/openai-assets/research-covers/language-unsupervised/language_​understanding_paper.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raffel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Narang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Matena</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer</article-title>
          <source>Journal of Machine Learning Research</source>
          <year>2020</year>
          <volume>21</volume>
          <fpage>1</fpage>
          <lpage>67</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jmlr.org/papers/v21/20-074.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>de la Clergerie</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bordes</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sagot</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>MUSS: multilingual unsupervised sentence simplification by mining paraphrases</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on April 16, 2021</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2005.00352</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wenzek</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lachaux</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Conneau</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhary</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Guzmán</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Joulin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Grave</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>CCNet: Extracting High Quality Monolingual Datasets from Web Crawl Data</article-title>
          <source>Proceedings of the Twelfth Language Resources and Evaluation Conference</source>
          <year>2020</year>
          <conf-name>LREC 2020: The 12th Conference on Language Resources and Evaluation</conf-name>
          <conf-date>May 11-16, 2020</conf-date>
          <conf-loc>Marseille, France</conf-loc>
          <publisher-name>European Language Resources Association</publisher-name>
          <fpage>4003</fpage>
          <lpage>4012</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.lrec-1.494"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Semi-Supervised Text Simplification with Back-Translation and Asymmetric Denoising Autoencoders</article-title>
          <source>AAAI</source>
          <year>2020</year>
          <month>04</month>
          <day>03</day>
          <volume>34</volume>
          <issue>05</issue>
          <fpage>9668</fpage>
          <lpage>9675</lpage>
          <pub-id pub-id-type="doi">10.1609/aaai.v34i05.6515</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Surya</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mishra</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Laha</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sankaranarayanan</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Unsupervised Neural Text Simplification</article-title>
          <source>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2019</year>
          <conf-name>The 57th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>July 28-August 2, 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>2058</fpage>
          <lpage>2068</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P19-1198.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/p19-1198</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Document-Level Text Simplification: Dataset, Criteria and Baseline</article-title>
          <source>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2021</year>
          <conf-name>The 2021 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>November 7–11, 2021</conf-date>
          <conf-loc>Online and Punta Cana, Dominican Republic</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>7997</fpage>
          <lpage>8013</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2021.emnlp-main.630.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2021.emnlp-main.630</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coster</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kauchak</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Simple English Wikipedia: a new text simplification task</article-title>
          <source>Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2011</year>
          <conf-name>The 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies</conf-name>
          <conf-date>June 19-24, 2011</conf-date>
          <conf-loc>Portland, OR</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>665</fpage>
          <lpage>669</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P11-2117.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Maddela</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Neural CRF Model for Sentence Alignment in Text Simplification</article-title>
          <source>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2020</year>
          <month>7</month>
          <conf-name>The 58th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>July 5-10, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>7943</fpage>
          <lpage>7960</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.acl-main.709.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.709</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Callison-Burch</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Napoles</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Problems in Current Text Simplification Research: New Data Can Help</article-title>
          <source>TACL</source>
          <year>2015</year>
          <month>12</month>
          <volume>3</volume>
          <fpage>283</fpage>
          <lpage>297</lpage>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00139</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bjerva</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bos</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>van der Goot</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nissim</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The Meaning Factory: Formal Semantics for Recognizing Textual Entailment and Determining Semantic Similarity</article-title>
          <source>Proceedings of the 8th International Workshop on Semantic Evaluation (SemEval 2014)</source>
          <year>2014</year>
          <conf-name>The 8th International Workshop on Semantic Evaluation (SemEval 2014)</conf-name>
          <conf-date>August 23-24, 2014</conf-date>
          <conf-loc>Dublin, Ireland</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>642</fpage>
          <lpage>646</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/S14-2114.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/v1/s14-2114</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Laban</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Schnabel</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hearst</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Keep It Simple: Unsupervised Simplification of Multi-Paragraph Text</article-title>
          <source>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</source>
          <year>2021</year>
          <conf-name>The 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing</conf-name>
          <conf-date>August 1–6, 2021</conf-date>
          <conf-loc>Online</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>6365</fpage>
          <lpage>6378</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2021.acl-long.498.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2021.acl-long.498</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van den Bercken</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sips</surname>
              <given-names>R-J</given-names>
            </name>
            <name name-style="western">
              <surname>Lofi</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Evaluating neural text simplification in the medical domain</article-title>
          <year>2019</year>
          <month>5</month>
          <conf-name>WWW '19: The World Wide Web Conference</conf-name>
          <conf-date>May 13-17, 2019</conf-date>
          <conf-loc>San Francisco CA</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Association for Computing Machinery (ACM)</publisher-name>
          <fpage>3286</fpage>
          <lpage>3292</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.1145/3308558.3313630"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3308558.3313630</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <article-title>Dataset</article-title>
          <source>Github</source>
          <access-date>2022-10-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/AshOlogn/Paragraph-level-Simplification-of-Medical-Texts">https://github.com/AshOlogn/Paragraph-level-Simplification-of-Medical-Texts</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kincaid</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Fishburne Jr</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Rogers</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Chissom</surname>
              <given-names>BS</given-names>
            </name>
          </person-group>
          <article-title>Derivation of new readability formulas (automated readability index, fog count and Flesch reading ease formula) for navy enlisted personnel</article-title>
          <source>Naval Technical Training Command Millington TN Research Branch</source>
          <year>1975</year>
          <month>2</month>
          <day>1</day>
          <access-date>2022-10-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://apps.dtic.mil/sti/citations/ADA006655">https://apps.dtic.mil/sti/citations/ADA006655</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Papineni</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Roukos</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ward</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Bleu: a method for automatic evaluation of machine translation</article-title>
          <source>Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2002</year>
          <conf-name>The 40th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>July 7-12, 2002</conf-date>
          <conf-loc>Philadelphia, PA</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>311</fpage>
          <lpage>318</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P02-1040.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/1073083.1073135</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>BioSentVec: creating sentence embeddings for biomedical texts</article-title>
          <year>2019</year>
          <conf-name>2019 IEEE International Conference on Healthcare Informatics (ICHI)</conf-name>
          <conf-date>June 10-13, 2019</conf-date>
          <conf-loc>Xi'an, China</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>IEEE</publisher-name>
          <fpage>1</fpage>
          <lpage>15</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/8904728"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/ICHI.2019.8904728</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breland</surname>
              <given-names>Hm</given-names>
            </name>
          </person-group>
          <article-title>Word Frequency and Word Difficulty: A Comparison of Counts in Four Corpora</article-title>
          <source>Psychol Sci</source>
          <year>2016</year>
          <month>05</month>
          <day>06</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>96</fpage>
          <lpage>99</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://paperpile.com/b/wPHHu8/3EP0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1467-9280.1996.tb00336.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Narayan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Lapata</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Don’t Give Me the Details, Just the Summary! Topic-Aware Convolutional Neural Networks for Extreme Summarization</article-title>
          <source>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2018</year>
          <conf-name>The 2018 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>October 31-November 4, 2018</conf-date>
          <conf-loc>Brussels, Belgium</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>1797</fpage>
          <lpage>1807</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D18-1206.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d18-1206</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nallapati</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>dos Santos</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gu̇lçehre</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Abstractive Text Summarization using Sequence-to-sequence RNNs and Beyond</article-title>
          <source>Proceedings of the 20th SIGNLL Conference on Computational Natural Language Learning</source>
          <year>2016</year>
          <month>8</month>
          <conf-name>The 20th SIGNLL Conference on Computational Natural Language Learning</conf-name>
          <conf-date>August 7-12, 2016</conf-date>
          <conf-loc>Berlin, Germany</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>280</fpage>
          <lpage>290</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/K16-1028.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/k16-1028</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pretraining</article-title>
          <source>Findings of the Association for Computational Linguistics, EMNLP 2020</source>
          <year>2020</year>
          <conf-name>EMNLP 2020</conf-name>
          <conf-date>November 16-20, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>2401</fpage>
          <lpage>2410</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.findings-emnlp.217.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ranzato</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chopra</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Auli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zaremba</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Sequence Level Training with Recurrent Neural Networks</article-title>
          <source>arXiv</source>
          <comment>
            Preprint posted online on May 6, 2016. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1511.06732"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aghajanyan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shrivastava</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Better Fine-Tuning by Reducing Representational Collapse</article-title>
          <year>2020</year>
          <month>4</month>
          <conf-name>International Conference on Learning Representations (ICLR 2020)</conf-name>
          <conf-date>April 26–30, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/343547031_Better_Fine-Tuning_by_Reducing_Representational_Collapse"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>Rj</given-names>
            </name>
          </person-group>
          <article-title>Simple statistical gradient-following algorithms for connectionist reinforcement learning</article-title>
          <source>Mach Learn</source>
          <year>1992</year>
          <month>5</month>
          <volume>8</volume>
          <issue>3-4</issue>
          <fpage>229</fpage>
          <lpage>256</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://link.springer.com/article/10.1007/BF00992696"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/BF00992696</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rennie</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Marcheret</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Mroueh</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Goel</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Self-Critical Sequence Training for Image Captioning</article-title>
          <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</source>
          <year>2017</year>
          <month>7</month>
          <conf-name>2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>
          <conf-date>July 21-26, 2017</conf-date>
          <conf-loc>Honolulu, HI</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>IEEE</publisher-name>
          <fpage>7008</fpage>
          <lpage>7024</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openaccess.thecvf.com/content_cvpr_2017/papers/Rennie_Self-Critical_Sequence_Training_CVPR_2017_paper.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/isrctn12348322</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spasic</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nenadic</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Clinical Text Data in Machine Learning: Systematic Review</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>03</month>
          <day>31</day>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>e17984</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/3/e17984/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17984</pub-id>
          <pub-id pub-id-type="medline">32229465</pub-id>
          <pub-id pub-id-type="pii">v8i3e17984</pub-id>
          <pub-id pub-id-type="pmcid">PMC7157505</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>L</given-names>
            </name>
            <collab>De</collab>
            <name name-style="western">
              <surname>Sagot</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bordes</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Controllable Sentence Simplification</article-title>
          <source>InProceedings of the 12th Language Resources and Evaluation Conference</source>
          <year>2020</year>
          <month>05</month>
          <day>11</day>
          <conf-name>In Proceedings of the Twelfth Language Resources and Evaluation Conference</conf-name>
          <conf-date>2020-05-11</conf-date>
          <conf-loc>France</conf-loc>
          <fpage>4689</fpage>
          <lpage>4698</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.lrec-1.577/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>YY</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bhendawade</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>FastSeq: Make Sequence Generation Faster</article-title>
          <year>2021</year>
          <month>08</month>
          <day>01</day>
          <conf-name>InProceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations. Aug 2021</conf-name>
          <conf-date>2022-08-01</conf-date>
          <conf-loc>Thailand</conf-loc>
          <fpage>218</fpage>
          <lpage>226</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2021.acl-demo.26/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2021.acl-demo.26</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paulus</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>A Deep Reinforced Model for Abstractive Summarization</article-title>
          <year>2018</year>
          <conf-name>International Conference on Learning Representations (ICLR 2018)</conf-name>
          <conf-date>April 30 to May 3, 2018</conf-date>
          <conf-loc>Vancouver, BC</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/316875315_A_Deep_Reinforced_Model_for_Abstractive_Summarization"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>C-Y</given-names>
            </name>
          </person-group>
          <article-title>ROUGE: A Package for Automatic Evaluation of Summarie</article-title>
          <year>2004</year>
          <conf-name>Text Summarization Branches Out</conf-name>
          <conf-date>July 25 and 6, 2004</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <publisher-loc>New Brunswick, NJ</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>74</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/W04-1013.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Neubig</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>BARTScore: Evaluating Generated Text as Text Generation</article-title>
          <year>2021</year>
          <month>05</month>
          <day>21</day>
          <conf-name>Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021</conf-name>
          <conf-date>December 6-14, 2021</conf-date>
          <conf-loc>Virtual</conf-loc>
          <fpage>27263</fpage>
          <lpage>27277</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper/2021/hash/e4d2b6e6fdeca3e60e0f1a62fee3d9dd-Abstract.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Saleh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Pegasus: Pre-training with extracted gap-sentences for abstractive summarization</article-title>
          <year>2020</year>
          <month>07</month>
          <day>13</day>
          <conf-name>InInternational Conference on Machine Learning. 2020</conf-name>
          <conf-date>2020-07-13</conf-date>
          <conf-loc>Virtual</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://proceedings.mlr.press/v119/zhang20ae"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loshchilov</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Hutter</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Decoupled Weight Decay Regularization</article-title>
          <year>2018</year>
          <month>09</month>
          <day>27</day>
          <conf-name>International Conference on Learning Representations</conf-name>
          <conf-date>2018</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
