<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i11e26065</article-id>
      <article-id pub-id-type="pmid">34842547</article-id>
      <article-id pub-id-type="doi">10.2196/26065</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Active Annotation in Evaluating the Credibility of Web-Based Medical Information: Guidelines for Creating Training Data Sets for Machine Learning</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Xu</surname>
            <given-names>Wei</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jordan-Marsh</surname>
            <given-names>Maryalice</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hidki</surname>
            <given-names>Asmaa</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Nagavally</surname>
            <given-names>Sneha</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Nabożny</surname>
            <given-names>Aleksandra</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Software Engineering</institution>
            <institution>Gdańsk University of Technology</institution>
            <addr-line>11/12 Gabriela Narutowicza St</addr-line>
            <addr-line>Gdańsk, 80-233</addr-line>
            <country>Poland</country>
            <phone>48 602327778</phone>
            <email>aleksandra.nabozny@pja.edu.pl</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9534-142X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Balcerzak</surname>
            <given-names>Bartłomiej</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0881-5362</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Wierzbicki</surname>
            <given-names>Adam</given-names>
          </name>
          <degrees>Prof Dr Hab</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0075-7030</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Morzy</surname>
            <given-names>Mikołaj</given-names>
          </name>
          <degrees>PhD, DSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2905-9538</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Chlabicz</surname>
            <given-names>Małgorzata</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5113-5672</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Software Engineering</institution>
        <institution>Gdańsk University of Technology</institution>
        <addr-line>Gdańsk</addr-line>
        <country>Poland</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Polish-Japanese Academy of Information Technology</institution>
        <addr-line>Warsaw</addr-line>
        <country>Poland</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Faculty of Computing and Telecommunications</institution>
        <institution>Poznan University of Technology</institution>
        <addr-line>Poznań</addr-line>
        <country>Poland</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Population Medicine and Lifestyle Diseases Prevention</institution>
        <institution>Medical University of Białystok</institution>
        <addr-line>Białystok</addr-line>
        <country>Poland</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Aleksandra Nabożny <email>aleksandra.nabozny@pja.edu.pl</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>26</day>
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>11</issue>
      <elocation-id>e26065</elocation-id>
      <history>
        <date date-type="received">
          <day>26</day>
          <month>11</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>9</day>
          <month>2</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>29</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>24</day>
          <month>9</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Aleksandra Nabożny, Bartłomiej Balcerzak, Adam Wierzbicki, Mikołaj Morzy, Małgorzata Chlabicz. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 26.11.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/11/e26065" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The spread of false medical information on the web is rapidly accelerating. Establishing the credibility of web-based medical information has become a pressing necessity. Machine learning offers a solution that, when properly deployed, can be an effective tool in fighting medical misinformation on the web.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study is to present a comprehensive framework for designing and curating machine learning training data sets for web-based medical information credibility assessment. We show how to construct the annotation process. Our main objective is to support researchers from the medical and computer science communities. We offer guidelines on the preparation of data sets for machine learning models that can fight medical misinformation.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We begin by providing the annotation protocol for medical experts involved in medical sentence credibility evaluation. The protocol is based on a qualitative study of our experimental data. To address the problem of insufficient initial labels, we propose a preprocessing pipeline for the batch of sentences to be assessed. It consists of representation learning, clustering, and reranking. We call this process active annotation.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We collected more than 10,000 annotations of statements related to selected medical subjects (psychiatry, cholesterol, autism, antibiotics, vaccines, steroids, birth methods, and food allergy testing) for less than US $7000 by employing 9 highly qualified annotators (certified medical professionals), and we release this data set to the general public. We developed an active annotation framework for more efficient annotation of noncredible medical statements. The application of qualitative analysis resulted in a better annotation protocol for our future efforts in data set creation.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The results of the qualitative analysis support our claims of the efficacy of the presented method.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>active annotation</kwd>
        <kwd>credibility</kwd>
        <kwd>web-based medical information</kwd>
        <kwd>fake news</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>In 2020 and 2021, the world has not been fighting only a pandemic; more precisely, it has been fighting both a pandemic and an infodemic [<xref ref-type="bibr" rid="ref1">1</xref>]. The spread of COVID-19 has been accompanied by an equally unfortunate and dangerous spread of misinformation such as fake news linking the COVID-19 epidemic to 5G technology [<xref ref-type="bibr" rid="ref2">2</xref>]. Disinformation has influenced other disease outbreaks such as the measles outbreak in Germany that involved more than 570 reported measles cases and caused infant deaths [<xref ref-type="bibr" rid="ref3">3</xref>]. This study suggests that there exist numerous similar examples. From anticholesterol treatment to psychiatry—potentially harmful noncredible medical content on varied topics proliferates on the web.</p>
        <p>Web-based information related to health and medicine is a large and influential category of web content, to the extent that the term <italic>Dr Google</italic> has been coined. The case of health-related web content is interesting from the point of view of informatics not only because medical information is highly specialized and written using domain-specific vocabulary, but also because medical information on the web is often misinterpreted or taken out of context. Health-related fake news reports often rely on factually correct medical statements such as the antiseptic effect of silver ions, which translates into a false belief in the universal effectiveness of colloidal silver for treating any disease. Debunking health-related web content requires not only expertise but also awareness of the possible effects of misinterpreted information. The breadth of specialized medical knowledge, coupled with the impact of context on fake news debunking, increases the difficulty of the problem of medical fake news detection.</p>
        <p>Fully automated methods are currently not mature enough to detect medical fake news with sufficient accuracy. A realistic system for detecting and debunking medical fake news needs to keep medical experts in the loop. However, such an approach is not scalable because medical experts and health professionals cannot allocate sufficient time to handle the volume of misinformation spreading on the web. Another issue is that, in general, compared with credible medical content, noncredible medical web content is sparse. Assuming a real human–assisted system for assessing the credibility of medical statements, statistically, out of 100 assessed statements, the expert will catch no more than 20 unreliable items (as shown by our data collection experiment). The purpose of our work is to create an automatic tool to maximize the number of potentially noncredible sentences to be verified in the first place. The sentences are then reordered so that the most noncredible content shows up first to be annotated by a human judge. In such a way, we can optimize medical experts’ time and efficacy when annotating medical information. Even if only a portion of potentially noncredible sentences gets annotated by the expert, it will include the most suspicious content.</p>
        <p>We propose to use a method called active annotation. It dramatically improves the use of annotators’ time. Active annotation implements a highly efficient human-in-the-loop component for augmented text annotation. The main idea behind active annotation is to use an unsupervised machine learning method (grouping of sentences into clusters based on sentence similarity) to organize the training data to suggest annotation labels for human annotators. When active annotation is used, the work of human annotators (medical experts) is focused on difficult noncredible medical statements. In addition, because the annotators process clusters of semantically similar sentences, our method significantly reduces the cost of cognitively expensive context switching. However, it is the annotators who decide the final labeling of the data.</p>
        <p>The method proposed in this paper extends currently known active annotation methods by a cluster-ranking procedure that ensures that medical experts first see the content clusters that are most likely to contain noncredible content. This approach allows us to speed up the discovery of noncredible content. In our view, the process of detecting and debunking medical misinformation will never stop, and therefore a method that optimizes the use of medical experts’ is of essential importance.</p>
        <p>To test our method, we conducted an experiment with the participation of medical experts. They were asked to evaluate the credibility of medical and health-related Web content. The result of the experiment is a large data set that contains numerous examples of medical misinformation. We conducted an explorative and qualitative analysis of this data set, searching for patterns of similarity among the different examples of medical misinformation. The result of this analysis (which included an in-depth case study of misinformation related to cholesterol therapy with statins) was the discovery of distinct narratives of medical misinformation. We believe that these narratives are general in nature and will be of great use for detecting medical misinformation in the future.</p>
        <p>Our direct experiences with the annotation team dictate a set of rules that have been formalized as a strict protocol for medical text annotation. Most importantly, we noted that the annotators tended to use external contexts extensively when annotating data. This, in turn, led to incoherent annotation labels across the data set and a divergence between the notions of statement credibility and statement truthfulness. We share our experience and present an annotation protocol that we have used to mitigate some of the annotation problems.</p>
        <p>The original contributions presented in our paper are as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>An annotation schema, an annotation protocol, and a unique annotated data set comprising 10,000 sentences taken from web-based content on medical issues, labeled by medical experts as credible, noncredible, or neutral. The entire data set is available in a public repository [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
          </list-item>
          <list-item>
            <p>A method for ranking sentences submitted to medical experts for labeling. Our active annotation method increases the likelihood that medical experts will discover noncredible sentences and thus optimizes the use of medical experts’ time.</p>
          </list-item>
          <list-item>
            <p>A qualitative analysis of the labeled data set. We discovered 4 distinct narratives (both syntactic and semantic) present in the noncredible statements. We believe that these narratives can be further used to discern noncredible statements in areas of medicine other than the areas covered by our data set.</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Literature Review</title>
        <p>Health literacy is a rising concern, especially during the COVID-19 pandemic. However, research shows that more than half of the population struggles with making proper judgments and taking decisions in everyday life concerning their health [<xref ref-type="bibr" rid="ref5">5</xref>]. Moreover, studies from the United States, Europe, and Australia [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>] have found that web-based health information is written above the average reading level of adults. There is clearly the need for external tools or strategies to support laypersons in assessing the credibility of web-based health information. Expert fact-checking is one of the proposed strategies [<xref ref-type="bibr" rid="ref8">8</xref>] because short-format refutational medical expert fact-checks have proven to be free from the <italic>backfire effect</italic> [<xref ref-type="bibr" rid="ref9">9</xref>] (the <italic>backfire effect</italic> has been described in the study by Nyhan and Reifler [<xref ref-type="bibr" rid="ref10">10</xref>]). Research shows that using expert sources to correct health misinformation in social media permanently corrects users’ false beliefs.</p>
        <p>The related work on the general news media domain [<xref ref-type="bibr" rid="ref11">11</xref>] demonstrates that a credible source can promote false information and vice versa. Technological innovation in the fight against disinformation, as the authors argue, should go beyond discrediting noncredible sources of information and should instead promote more careful information consumption [<xref ref-type="bibr" rid="ref11">11</xref>]. The literature has reported on successful machine learning models that classify entire articles or information sources [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Of note, these models can easily overfit (ie, obtain high classification accuracy for publications from media outlets present in the training set but fail to generalize to previously unseen media outlets). The possible performance drop in classifying fake news from previously unseen sources has been examined in the literature [<xref ref-type="bibr" rid="ref12">12</xref>]. The study by Afsana et al [<xref ref-type="bibr" rid="ref14">14</xref>] is, to the best of our knowledge, the most accurate classification model for assessing the quality of web-based health information. The authors declare accuracy ranging from 84% to 90% varied over 10 criteria. The model includes source-level and article-level features. The relationship of the described criteria with credibility remains an open research question.</p>
        <p>The assessment of the veracity of individual claims contained in open-domain news articles is an emerging and fast-growing field of research. The scope of activities includes the creation of data sets containing the claims collected from fact-checking websites, such as MultiFC [<xref ref-type="bibr" rid="ref15">15</xref>], Liar [<xref ref-type="bibr" rid="ref16">16</xref>], and Truth of Varying Shades [<xref ref-type="bibr" rid="ref17">17</xref>], and the existing solutions are based on a variety of approaches, from semi-automatic knowledge graph creation [<xref ref-type="bibr" rid="ref18">18</xref>] to choosing check-worthy claims and comparing them against verified content (ClaimBuster) [<xref ref-type="bibr" rid="ref19">19</xref>]. The open-domain solutions or solutions used in journalism [<xref ref-type="bibr" rid="ref20">20</xref>] are not easily transferable to the medical domain.</p>
        <p>The MedFact system [<xref ref-type="bibr" rid="ref21">21</xref>] is a stand-alone web-based health information consumption support system. In MedFact, the user is automatically equipped with relevant trusted sources during web-based discussions.</p>
        <p>State-of-the-art information retrieval models [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>] forms part of the fully and semi-automatic fact-checking systems. A combination of such systems’ judgments and human judgments has been successfully applied in the study by Ghenai and Mejova [<xref ref-type="bibr" rid="ref24">24</xref>] for the specific case of capturing the spread of rumors regarding the Zika virus. Our goal is to test the combination of an unsupervised machine learning model with a human-in-the-loop approach as a robust tool to support the assessment of the credibility of web-based medical statements.</p>
        <p>The quality assessment coding scheme for lay medical articles had been proposed in the 1990s under the Discern handbook project [<xref ref-type="bibr" rid="ref25">25</xref>] and as Health on the Net (HON) principles. However, the guidelines have to comply with the rapidly evolving web-based reality; thus, new tools and updates are designed every few years, such as the Ensuring Quality Information for Patients (2004) [<xref ref-type="bibr" rid="ref26">26</xref>] tool, Evidence-Based Patient Information (2010) [<xref ref-type="bibr" rid="ref27">27</xref>], and Good Practice Guidelines for Health Information (2016) [<xref ref-type="bibr" rid="ref28">28</xref>], to name a few. Keselman et al [<xref ref-type="bibr" rid="ref29">29</xref>] propose different credibility assessment criteria based on 25 web-based articles regarding type 2 diabetes. These criteria (objectivity, emotional appeal, promises, and certainty) can be automatically captured by language models and lexicon-based machine learning. Work on web-based journalism has developed good practices that can also be used by medical experts in credibility evaluation. Medical practitioners who directly communicate medical information to patients can observe their reactions and subsequent actions and therefore have a special agency in credibility evaluation.</p>
        <p>Successful application of machine learning models requires the annotation of vast corpora of medical information. However, this annotation is prohibitively expensive given the required expertise of the annotators and their limited capacity. Active annotation is a technique that facilitates large-scale data annotation by providing an auxiliary ranking of sentences that should be manually annotated by medical experts and by expediating labeling of other sentences to the underlying machine learning model. In this study, we are particularly inspired by the approach presented by Marinelli et al [<xref ref-type="bibr" rid="ref30">30</xref>]. The authors propose initially dividing text documents into separate clusters, selecting pivot documents (k-closest documents to the center of each cluster), and generating a tentative label for the cluster. Next, a small set of text documents is selected and presented to human annotators with a proposed label and a binary annotation decision (to accept or reject the label). The authors claim that in many applications, obtaining a full annotation schema before annotation may be difficult and turning the annotation task into a binary question–answering task significantly speeds up the process [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
      </sec>
      <sec>
        <title>Language Modeling</title>
        <p>The term <italic>language model</italic> is confusing because it serves as an umbrella term for different concepts. As a general rule, a language model is a way in which textual content (tokens, words, sentences, paragraphs, and documents) is represented. Historically, text documents have been represented using 2 prevalent models: the bag-of-words model (where a document is represented simply as the set of words appearing in the document) and the one-hot encoding model (where a document is represented by a binary vector of a length equal to the size of the vocabulary and each position in the vector encodes the presence or absence of a word in the document). The most consequential limitation of these models was the inability to capture the semantic similarity between words. For instance, if a document contained the word <italic>diabetes</italic> and another document contained the word <italic>insulin</italic>, there was no straightforward way of deciding that the documents shared a common topic. This limitation has been abruptly neutralized with the advent of word embeddings. Word embeddings are dense continuous vector representations of words from a given vocabulary, which means that each word is assigned a unique vector whose elements are arbitrary numbers. Unlike one-hot encoding vectors where each vector has a length equal to the size of the vocabulary, word embedding vectors have, at most, several hundred dimensions. The vectors are trained on the text corpus to capture various semantic relationships among words. For instance, words such as <italic>apple</italic>, <italic>pear,</italic> and <italic>orange</italic> appear close to each other in the vector space because part of their representation encodes the notion of being a fruit. Analogically, the distance between the words <italic>Russia</italic> and <italic>Moscow</italic> is similar to the distance between the words <italic>Great Britain</italic> and <italic>London</italic> because the difference between the respective word vectors encodes the notion of a capital city.</p>
        <p>Since the seminal work of Mikolov et al [<xref ref-type="bibr" rid="ref31">31</xref>], word embeddings have revolutionized the field of natural language processing. After the initial success of the <italic>word2vec</italic> algorithm, numerous alternatives have been introduced: Global Vector embeddings trained through matrix factorization [<xref ref-type="bibr" rid="ref32">32</xref>], embeddings trained on sentence dependency parse trees [<xref ref-type="bibr" rid="ref33">33</xref>], embeddings in the hyperbolic space [<xref ref-type="bibr" rid="ref34">34</xref>], subword embeddings [<xref ref-type="bibr" rid="ref35">35</xref>], and many more. The common feature of these embeddings is the static assignment of dense vector representations to words. Each word receives the same embedding vector, irrespective of the context in which the word appears in a sentence. These static embeddings can be used to create representations for larger text units such as sentences, paragraphs, and documents. However, static embeddings are inherently unable to capture the intricacies hidden in the structure of the language and encoded in the context in which each word appears. Consider these 2 sentences: “A photo reveals significant damage to the tissue” and “Please do not throw used tissues into the toilet.” The word <italic>tissue</italic> will receive the same vector although the context allows disambiguation of the meaning of the word.</p>
        <p>To mitigate this limitation, modern language models depend on deep neural network architectures to calculate accurate, context-dependent word and sentence embeddings. First, context-dependent language models used either the long short-term memory network architecture [<xref ref-type="bibr" rid="ref36">36</xref>] or gated recurrent unit networks [<xref ref-type="bibr" rid="ref37">37</xref>] to capture contextual dependencies among the words appearing in a sentence. In other words, unlike static word embeddings, context-dependent language models calculate an embedding word vector based on the context (ie, words surrounding the embedded words). In the aforementioned example, the word <italic>tissue</italic> would receive 2 different vector representations: in the first sentence, the vector for the word <italic>tissue</italic> would be much closer to the vectors of words such as <italic>skin</italic> or <italic>cell</italic>; in the second sentence, the vector for the word <italic>tissue</italic> would be closer to the vector of the word <italic>handkerchief</italic>. These early recurrent architectures, however, suffered from performance drawbacks, and in 2018 they were replaced by transformer architecture [<xref ref-type="bibr" rid="ref38">38</xref>]. This architecture allowed the training of much better embeddings, such as Google’s Universal Sentence Encoder [<xref ref-type="bibr" rid="ref39">39</xref>] or the (infamous) Generative Pre-trained Transformer 3 [<xref ref-type="bibr" rid="ref40">40</xref>].</p>
        <p>The current state-of-the-art language model, Bidirectional Encoder Representations from Transformers (BERT) [<xref ref-type="bibr" rid="ref41">41</xref>], produces continuous word vector representations by training the neural network using 2 parallel objectives: guessing the masked word in a sentence (ie, trying to predict the word based on the context) and deciding whether 2 sentences appear one after another. Given such training objectives, the network applies similar weights to the nodes regarding input words that appear in a similar context. Sentence-BERT (sBERT) [<xref ref-type="bibr" rid="ref42">42</xref>] is a straightforward extension of the original BERT architecture for creating sentence embeddings. This model is based on Siamese BERT networks [<xref ref-type="bibr" rid="ref43">43</xref>] (2 identical models trained simultaneously) that are fine-tuned on the Natural Language Inference and Semantic Textual Similarity tasks. The model serves as an encoder for sentences. The encoder calculates vector representations of sentences so that semantically similar sentences have low cosine distance in the latent embedding space. This is both more efficient and produces semantically richer sentence representations than simply averaging the vectors of words that appear in each sentence.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Presentation of 3 Steps</title>
        <p>To validate the efficacy of the active annotation approach, we need to create a data set of sentences on medical topics gathered from the Web, after which we need to obtain credibility evaluations of these sentences from medical experts. We need to propose methods for selecting sentences from the Web, annotating of these sentences by medical experts, and organizing these sentences into a processing pipeline to use the experts’ time and attention most efficiently. These 3 steps we elaborate on in this section.</p>
      </sec>
      <sec>
        <title>Data Selection</title>
        <p>We performed annotation on a data set of 247 articles collected manually from various eHealth websites. The data set consists of more than 10,000 sentences. All documents were annotated by medical professionals sentence by sentence. The sentences constitute a stratified sample of source texts of varying credibility. We first discussed the most problematic topics of specific medical fields with the medical practitioners. Next, we manually searched for articles that presented contradicting views regarding these topics. These topics include the following:</p>
        <list list-type="order">
          <list-item>
            <p>Pediatrics:</p>
            <list>
              <list-item>
                <p>Children’s antibiotics consumption (432 sentences)</p>
              </list-item>
              <list-item>
                <p>Children’s steroids consumption (701 sentences)</p>
              </list-item>
              <list-item>
                <p>Vaccination (1262 sentences)</p>
              </list-item>
              <list-item>
                <p>Dietary interventions for children with autism (431 sentences)</p>
              </list-item>
              <list-item>
                <p>Food allergy testing (1401 sentences)</p>
              </list-item>
            </list>
          </list-item>
          <list-item>
            <p>Psychiatry:</p>
            <list>
              <list-item>
                <p>Effectiveness of psychiatric medication and electroconvulsive therapy (2272 sentences)</p>
              </list-item>
            </list>
          </list-item>
          <list-item>
            <p>Cardiology:</p>
            <list>
              <list-item>
                <p>Benefits of statin therapy in treating cardiovascular disease (CVD; 2029 sentences)</p>
              </list-item>
              <list-item>
                <p>Dietary interventions for heart health improvement (423 sentences)</p>
              </list-item>
              <list-item>
                <p>Benefits of consumption of antioxidants (694 sentences)</p>
              </list-item>
            </list>
          </list-item>
          <list-item>
            <p>Gynecology:</p>
            <list>
              <list-item>
                <p>Benefits of cesarean section over natural birth (359 sentences)</p>
              </list-item>
              <list-item>
                <p>Selective serotonin reuptake inhibitor consumption during pregnancy (169 sentences)</p>
              </list-item>
              <list-item>
                <p>Aspirin consumption during pregnancy (257 sentences)</p>
              </list-item>
            </list>
          </list-item>
        </list>
        <p>Our collection of web-based health-related and medical articles reflects topics potentially causing controversy and misinformation among patients.</p>
      </sec>
      <sec>
        <title>Methodology of Selecting Source Websites</title>
        <p>The source websites were selected as follows. First, we asked each medical practitioner 2 questions:</p>
        <list list-type="order">
          <list-item>
            <p>“In your medical practice, what kind of false beliefs and rumors do you encounter when interacting with patients?”</p>
          </list-item>
          <list-item>
            <p>“The truthfulness of which facts do you have to prove to your patients most often?”</p>
          </list-item>
        </list>
        <p>The answers to these questions served as the basis for manually creating web queries. To create a data set of web medical articles addressed to laypersons, we submitted these queries to the Google search engine and then manually selected sources. The full list of these queries is listed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The manual collection was supported by the HON browser plugin (HON tag–certified webpages). As a result, 12.6% (31/247) of the extracted articles originated from HON-certified sources. The remaining 87.4% (216/247) come from domains such as the following:</p>
        <list list-type="bullet">
          <list-item>
            <p>Large news media outlets (eg, <italic>The Guardian</italic>, <italic>The New York Times</italic>, and BBC)</p>
          </list-item>
          <list-item>
            <p>Q&#38;A forums, both general and topic-specific (eg, “Quora”, “Yahoo”, “community.babycenter.com”)</p>
          </list-item>
          <list-item>
            <p>Parenting blogs (eg, “scarymommy.com”)</p>
          </list-item>
          <list-item>
            <p>Uncertified health portals (eg, “choosingwisely.org”, “practo.com”, and “heartuk.org.uk”)</p>
          </list-item>
          <list-item>
            <p>Advertising websites for medical supplements and medical testing (eg, “everlywell.com”, “yorktest.com”, and “naturesbest.co.uk/antioxidants”)</p>
          </list-item>
        </list>
        <p>The full list of data sources is available in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
        <p>In this study, we consider a sentence as the unit of consistent information that undergoes credibility assessment. According to Wikipedia [<xref ref-type="bibr" rid="ref44">44</xref>], “a sentence is a set of words that in principle tells a complete thought.” Thus, unless a sentence is highly complex, we can assume that the segmentation of a document into sentences is the easiest way to automatically extract single statements. To be precise, a single sentence may contain several statements. We have also observed that expert annotators tend to focus on statements rather than entire sentences when labeling data. However, we do not have a robust method of statement demarcation. In addition, most sentences contain a single main statement; thus, we decided to make the sentence the atomic unit of annotation and classification.</p>
        <p>An additional reason for focusing on single sentences is the phenomenon of shrinking attention. Recent studies suggest that, over recent decades, collective attention spans are becoming shorter across all domains of culture, including the web [<xref ref-type="bibr" rid="ref45">45</xref>]. It is debatable as to what the underlying cause of this phenomenon is. The most likely explanations suggest the impact of the rapid acceleration in the rate of production and consumption of information. Given finite attention resources, this inevitably leads to more cursory interaction with information. It is possible that this phenomenon also affects the consumption of health-related information, which only exacerbates the problem of the ubiquitousness of medical fake news on the web.</p>
      </sec>
      <sec>
        <title>Expert Annotators</title>
        <p>In all, 9 medical professionals took part in the experiment: 2 cardiologists, 1 gynecologist, 3 psychiatrists, and 3 pediatricians. All the experts had completed 6 years of medical studies, followed by a 5-year specialization program that culminated in a specialization examination. The experts were paid for a full day of work (approximately 8 hours each). Of the 9 experts, 8 (89%) had at least 10 years of clinical experience. The gynecologist was a resident physician; we accepted his participation in the experiment because of his status as a PhD candidate in medicine. Of the 3 psychiatrists, 1 (33%) held a PhD degree in medical sciences. The experts were allowed to browse certified medical information databases throughout the experiment. Each expert evaluated the credibility of content within their specialization (cardiology, gynecology, psychiatry, or pediatrics).</p>
      </sec>
      <sec>
        <title>Annotation Protocol</title>
        <p>Our goal is to create a rich and diverse corpus of medical sentences assessed and labeled in terms of their credibility by medical experts. To obtain reliable and comparable credibility evaluations, the experts participating in our study were supported by a detailed annotation protocol.</p>
        <p>The medical experts evaluated the credibility of sentences with the following set of labels and the corresponding instruction:</p>
        <list list-type="bullet">
          <list-item>
            <p>CRED (credible): the sentence is reliable; does not raise major objections; contains verifiable information from the medical domain</p>
          </list-item>
          <list-item>
            <p>NONCRED (not credible): the sentence contains false or unverifiable information; contains persuasion contrary to current medical recommendations; contains outdated information</p>
          </list-item>
          <list-item>
            <p>NEU (neutral): the sentence does not contain factual information (eg, it is a question); is not related to medicine</p>
          </list-item>
        </list>
        <p>The experts were asked to base their answers mostly on their experience, knowledge, and intuition, but they were also allowed to use an external database that they would usually use in the course of their medical practice. The main direction provided to the experts was to focus on the patient’s alleged perception of the information. The control question stated as follows: “If the patient asked you if he or she should trust this statement, would you say yes or no?”</p>
        <p>In addition, we collected the following information for each sentence:</p>
        <list list-type="bullet">
          <list-item>
            <p>Time needed for evaluation (in milliseconds)</p>
          </list-item>
          <list-item>
            <p>(Optional) Reason for evaluating the sentence as noncredible</p>
          </list-item>
          <list-item>
            <p>Number of surrounding sentences needed to understand the context of the sentence being evaluated</p>
          </list-item>
        </list>
        <p>Examples of credible sentences from the <italic>cholesterol and statins</italic> topic include the following:</p>
        <disp-quote>
          <p>Lp(a), the worst cholesterol, is a number most doctors don’t measure.</p>
        </disp-quote>
        <disp-quote>
          <p>Monitoring cholesterol levels is crucial because individuals with unhealthy cholesterol levels typically do not develop specific symptoms.</p>
        </disp-quote>
        <disp-quote>
          <p>Non-communicable chronic disease is now the biggest killer on the planet.</p>
        </disp-quote>
        <p>Examples of noncredible sentences include the following:</p>
        <disp-quote>
          <p>For the remaining 90% of the population, the total cholesterol had no predictive value.</p>
        </disp-quote>
        <disp-quote>
          <p>It seems likely that fear of fat is unreal, based on a carry-on of the cholesterol fear.</p>
        </disp-quote>
        <disp-quote>
          <p>Most people don’t need to cut down on the cholesterol that’s found in these foods.</p>
        </disp-quote>
        <p>Examples of neutral sentences include the following:</p>
        <disp-quote>
          <p>Seven [research items] found no link between LDL cholesterol and cardiovascular mortality.</p>
        </disp-quote>
        <disp-quote>
          <p>These perspectives won’t make headlines and they won’t appeal to those who want a simple and definite answers.</p>
        </disp-quote>
        <disp-quote>
          <p>This is not why I went to medical school.</p>
        </disp-quote>
      </sec>
      <sec>
        <title>Impact of Sentence Context on Credibility Evaluation</title>
        <p><xref ref-type="table" rid="table1">Table 1</xref> shows how many sentences required additional <italic>m</italic>-surrounding sentences to provide the context for annotation. When focusing on noncredible statements, more than 71.27% (1377/1932) of the sentences were self-explanatory, 26.6% (514/1932) of the sentences required a single sentence of context, and less than 2.17% (42/1932) of the sentences required 2 or more sentences of context. Thus, we conclude that our choice of the sentence as the unit of information is justified.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Number of surrounding sentences (<italic>m</italic>) needed to understand the context and evaluate the credibility of a sentence for all data, only credible subset, only noncredible subset, and only neutral subset (n=10,649).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="50"/>
            <col width="240"/>
            <col width="240"/>
            <col width="240"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td>
                  <italic>m</italic>
                </td>
                <td>All data, n (%)</td>
                <td>Credible subset, n (%)</td>
                <td>Noncredible subset, n (%)</td>
                <td>Neutral subset, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0</td>
                <td>8565 (80.43)</td>
                <td>4955 (80.07)</td>
                <td>1377 (71.27)</td>
                <td>2233 (88.3)</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>1958 (18.39)</td>
                <td>1165 (18.83)</td>
                <td>514 (26.6)</td>
                <td>279 (11.03)</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>107 (1)</td>
                <td>57 (0.92)</td>
                <td>34 (1.76)</td>
                <td>16 (0.63)</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>12 (0.11)</td>
                <td>5 (0.08)</td>
                <td>6 (0.31)</td>
                <td>1 (0.04)</td>
              </tr>
              <tr valign="top">
                <td>&#60;3</td>
                <td>8 (0.07)</td>
                <td>6 (0.1)</td>
                <td>2 (0.05)</td>
                <td>0 (0)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>For the annotation process, we used the software developed specifically for this experiment. During the experiment, the medical expert could not see the context of the whole document while annotating a sentence. However, we provided the most relevant keywords collected from the rest of the document. Keywords were extracted using the methods described in the study by Nabożny et al [<xref ref-type="bibr" rid="ref46">46</xref>]. A single task is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Annotation interface: single sentence view.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e26065_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>If the medical expert decided that a sentence could not be assessed because of insufficient context (despite visible keywords), they could display the preceding and succeeding sentences in the annotation view, as shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. Each medical expert was asked to annotate approximately 1000 randomly chosen sentences. Whenever the medical expert labeled a sentence as noncredible, they were asked to provide the reason for their decision. To avoid the effect of intentionally skipping the NONCRED label to complete the task quicker, providing the reason was optional, and the expert could also choose an explanation from a set of tags prepared beforehand.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Annotation interface: sentence in context view.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e26065_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The set of possible explanations prepared in advance included the following:</p>
        <list list-type="bullet">
          <list-item>
            <p>The sentence contains argumentation that is weak or irrelevant, given the context of the subject being discussed.</p>
          </list-item>
          <list-item>
            <p>The sentence contains an encouragement to act inconsistently with current medical knowledge.</p>
          </list-item>
          <list-item>
            <p>The author of this sentence shows signs of the lack of substantive knowledge or is not objective.</p>
          </list-item>
          <list-item>
            <p>The sentence is an anecdote or a rumor.</p>
          </list-item>
          <list-item>
            <p>The sentence is an advertisement of an unproven drug or substance or an unproven therapy.</p>
          </list-item>
          <list-item>
            <p>The sentence cites research that was conducted on a small sample.</p>
          </list-item>
          <list-item>
            <p>The sentence contains invalid numerical data.</p>
          </list-item>
          <list-item>
            <p>The sentence contains outdated information.</p>
          </list-item>
          <list-item>
            <p>The sentence is incomprehensible or grammatically incorrect.</p>
          </list-item>
        </list>
        <p>Most of the annotation was conducted in controlled laboratory conditions. The experts were performing annotation tasks in the presence of a supervisor who was conducting the experiment. At any time, the medical experts had access to the detailed instruction (definitions of each label) and could also ask the supervisor for assistance. The experts completed 70% of the tasks in controlled conditions, and the rest were completed with web-based assistance within a few days after the conclusion of the laboratory experiment.</p>
      </sec>
      <sec>
        <title>Sentence Processing Pipeline Using Clustering and Reranking</title>
        <p>Inspired by the active learning paradigm, we designed an assessment loop for medical sentence credibility. The core idea of the active annotation approach is to augment annotation efforts by 2 mechanisms:</p>
        <list list-type="bullet">
          <list-item>
            <p><italic>Clustering</italic>:</p>
            <p>Semantically similar sentences are automatically grouped into clusters. The process of clustering uses sentence-embedding representation. Each sentence is represented as a vector computed by the language model. As each sentence is a vector, mathematical measures of a distance can be used, such as the Euclidean distance or the cosine distance. We use the k-means algorithm to divide sentences into clusters. K-means is a simple iterative procedure where clustered items (in our case, vectors representing sentences) are assigned to the closest of k points representing cluster centers (also known as centroids). After assigning each item to the nearest centroid, the positions of the centroids are updated to reflect the geometric mean of assigned items. Finally, items are reassigned to the nearest centroid, and the procedure is repeated until no more reassignments are possible. The resulting clustering maximizes the similarity among the items assigned to a cluster and at the same time minimizes the similarity among the items assigned to different clusters. In other words, if 2 sentences are assigned to the same cluster, the distance between their vector representations is small, which in turn means that the sentences are semantically similar (because semantic similarity is the criterion of embedding vector training). When human annotators are presented with sentences from a cluster, they process sentences that share a common topic. This reduces the cognitive workload of human annotators because they do not have to switch contexts between annotated sentences.</p>
          </list-item>
          <list-item>
            <p><italic>Reranking</italic>:</p>
            <p>Noncredible statements are moved to the top of the ranking. Human annotators are required to identify noncredible statements; thus, every time human annotators are presented with a credible or neutral sentence, they may consider it to be a waste of their precious time. By combining sentence embeddings and clustering, we push sentences that are close to the already labeled noncredible sentences to the top of the ranking, prioritizing these sentences for the next round of manual annotation.</p>
          </list-item>
        </list>
        <p>In the active annotation process, the following steps are performed in the assessment loop:</p>
        <list list-type="order">
          <list-item>
            <p>Sentences from the corpus are encoded by the language model to produce sentence embeddings.</p>
          </list-item>
          <list-item>
            <p>The k-means clustering algorithm [<xref ref-type="bibr" rid="ref47">47</xref>] is applied, and the top <italic>k</italic> sentences nearest to the cluster center are chosen for initial human annotation. We use the elbow method [<xref ref-type="bibr" rid="ref48">48</xref>] to find the number of clusters (which represents the number of distinct topics in the corpus).</p>
          </list-item>
          <list-item>
            <p>Medical experts annotate selected sentences.</p>
          </list-item>
          <list-item>
            <p>The algorithm reranks all sentences based on the distribution of labels within clusters.</p>
          </list-item>
          <list-item>
            <p>Medical experts annotate sentences from the top of the ranking, triggering another reranking procedure.</p>
          </list-item>
        </list>
        <p>The general idea behind reranking is presented in <xref rid="figure3" ref-type="fig">Figure 3</xref>.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Sentence reranking: general idea.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e26065_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Step 4 is crucial to the method. First, we find clusters with a large proportion of labeled noncredible statements. During initial iterations of the method, only a small fraction of sentences are manually labeled, but the clustering step groups semantically similar sentences; therefore, we expect that many sentences belonging to a cluster with predominantly noncredible labels also would turn out to be noncredible. In step 5, more sentences are manually labeled, providing a better approximation of the true distribution of labels within clusters. By repeating steps 4 and 5, we annotate more and more sentences, prioritizing the annotation of noncredible sentences.</p>
        <p>For sentence embeddings computations, we use the sBERT modification Robustly Optimized BERT Pretraining Approach where embeddings are calculated based on the same model as BERT but with slightly different training objectives and hyperparameters [<xref ref-type="bibr" rid="ref49">49</xref>]. We also use a simple preprocessing technique where we subtract the mean and exclude the first principal component from each embedding vector [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>] (principal component analysis transformation). The assumption behind this step is that the first principal component encodes syntactic rules of the grammar of the sentences without contributing to their semantics. The removal of the first component strips sentence vectors of grammar and leaves only the part of the vector where the meaning is encoded.</p>
        <p><xref rid="figure4" ref-type="fig">Figure 4</xref> presents the overview of the sentence processing pipeline.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Processing pipeline. PCA: principal component analysis; RoBERTa: Robustly Optimized Bidirectional Encoder Representations from Transformers Pretraining Approach.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e26065_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The key component of the pipeline is the clustering and reranking strategy. For reranking, we perform 2-level sorting. The first sorting is applied to clusters, and the second sorting reorders sentences within clusters. We rank clusters based on the proportions of credible, noncredible, and neutral labels in the top <italic>m</italic> most central sentences. Our scoring formula penalizes clusters with a significant proportion of credible sentences. At the same time, it rewards clusters with a significant proportion of noncredible sentences. This strategy enables us to push most of the noncredible sentences to the top of the ranking, thus positioning them at the top of the queue for medical expert evaluation.</p>
        <p>Let p(c), p(n), and p(u) denote the probability that a random sentence is credible, noncredible, or neutral, respectively. This probability is computed by manually annotating <italic>m</italic> most central sentences in the cluster. The cluster score is defined as follows:</p>
        <disp-quote>
          <p>score@k = 1/e<sup>–(p[n]–p[c])</sup> + 1/w<sup>p(u)+1</sup> <bold>(1)</bold></p>
        </disp-quote>
        <p>The first component of the formula is the sigmoid function with the difference between p(n) and p(c) as the argument. If the difference is positive, which means that there is an advantage of noncredible proportion over credible, the sigmoid function gives results close to 1 (the bigger the difference, the closer to 1). If the difference is negative, the sigmoid value tends toward zero. The second component of the formula is the parametrizable function, which enables giving proper scoring weight to p(u). For example, given w=1.5, it orders clusters with p(n)=0.4 and p(c)=0.3 below clusters with p(n)=0.5 and p(c)=0.4. Without the second component, both clusters would receive the same score.</p>
        <p>The intracluster ranking of sentences is performed based on the distance of sentences from the center of the cluster, with more central sentences placed at the top of the ranking. The distance is measured as the cosine distance in the latent embedding space. The final ranking of all sentences is obtained by first ordering all clusters in the decreasing order of score@k and, next, by reordering sentences within each cluster by the growing distance from the center of the cluster.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview</title>
        <p>We used the method described in the previous section to create an annotated data set. We now describe the results. First, we present the data set statistics. Next, we depict the effect of our sentence pipelining method on the effectiveness of the medical experts’ time allocation. Subsequently, we conduct a qualitative analysis of the credible and noncredible sentences, focusing on a single topic.</p>
      </sec>
      <sec>
        <title>Distribution of Labels Within the Data Set</title>
        <p>The distribution of labels (CRED, NONCRED, and NEU) for each topic is shown in <xref rid="figure5" ref-type="fig">Figure 5</xref>. Distribution varies for each topic but within a certain range. For example, the CRED label is always at least two times more frequent than the NONCRED label and significantly more frequent than the NEU label. The NEU label applies to no more than 30% (3195/10,649) of the sentences in all topics, which leads us to the conclusion that, regardless of the topic, more than 59.99% (6389/10,649) of the statements warrant credibility checking.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Distribution of credible, noncredible, and neutral sentence labels within topics. CS: cesarean section; CRED: credible; NB: natural birth; NEU: neutral; NONCRED: noncredible; SSRI: selective serotonin reuptake inhibitor.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e26065_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Although the articles were explicitly picked so that they reflect potentially controversial topics, the proportion of noncredible sentences was generally small. Taking into account the alarm-raising calls of the medical experts, we can conclude that even a small contribution of noncredible content throughout the web has a substantial influence on the formation of people’s views.</p>
      </sec>
      <sec>
        <title>Justification for Using the Lift Measure</title>
        <p>We have chosen the lift measure to evaluate the effectiveness of our method. Throughout the qualitative analysis, it became apparent that semantic similarity measures retrieved from neural language models lose important information encoded in annotations. Our objective is to optimize medical experts’ time by focusing their attention on statements that are possibly noncredible. Using the lift measure, we determined the relative time savings by indicating how many more noncredible sentences a medical expert would see by reviewing a given percentage of the entire sentence corpus using our ranking. The lift measure specified for each ranking percentile is defined as follows:</p>
        <p><italic>lift@p</italic> = N/p × <italic>recall@p</italic> <bold>(2)</bold></p>
        <p>where <italic>p</italic> is the percentile, <italic>N</italic> is the total number of sentences in the corpus, and <italic>recall@p</italic> defines, for a given percentile <italic>p</italic> of the ranking, how many noncredible statements have been included in the <italic>p</italic>th percentile of the ranking.</p>
        <p>The key parameter of our method is <italic>m</italic>, the number of top sentences in a cluster for manual annotation. We tested our method on a full data set (all topics merged) for 3 <italic>m</italic> values<italic>,</italic> each of which is listed in <xref ref-type="table" rid="table2">Table 2</xref>. In <xref ref-type="table" rid="table3">Table 3</xref>, we present the lift results for the separate topic of <italic>cholesterol and statins</italic>. The baseline value for lift is 1. Thus, we can interpret the results as follows: the number by which a given value exceeds 1 tells us how many more noncredible sentences medical experts would discover at a given corpus percentile when using the reranking procedure. For example, when reviewing 20% of the full corpus, medical experts would discover 29% more noncredible sentences if the batch were to be reranked using the <italic>m</italic> value of 5 than without applying the procedure.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Lift results for the full data set. <italic>m</italic> is the number of top sentences from each cluster to be manually reviewed.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="160"/>
            <col width="310"/>
            <col width="310"/>
            <col width="60"/>
            <col width="60"/>
            <thead>
              <tr valign="top">
                <td>lift@<italic>m</italic></td>
                <td>Number of clusters</td>
                <td colspan="4">Batch percentile</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>1% (approximately 100 sentences)</td>
                <td>10% (approximately 1000 sentences)</td>
                <td>20%</td>
                <td>40%</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>lift@5</td>
                <td>200</td>
                <td>1.36</td>
                <td>
                  <italic>1.36</italic>
                  <sup>a</sup>
                </td>
                <td>
                  <italic>1.29</italic>
                </td>
                <td>
                  <italic>1.17</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>lift@10</td>
                <td>130</td>
                <td>1.23</td>
                <td>1.31</td>
                <td>1.3</td>
                <td>
                  <italic>1.17</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>lift@15</td>
                <td>100</td>
                <td>
                  <italic>1.49</italic>
                </td>
                <td>1.27</td>
                <td>1.22</td>
                <td>1.16</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>The best performing set of parameters for a given batch percentile is italicized.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Lift results for the cholesterol and statins topic. <italic>m</italic> is the number of top sentences from each cluster to be manually reviewed.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="160"/>
            <col width="310"/>
            <col width="310"/>
            <col width="60"/>
            <col width="60"/>
            <thead>
              <tr valign="top">
                <td>lift@<italic>m</italic></td>
                <td>Number of clusters</td>
                <td colspan="4">Batch percentile</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>1% (approximately 20 sentences)</td>
                <td>10% (approximately 200 sentences)</td>
                <td>20%</td>
                <td>40%</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>lift@5</td>
                <td>40</td>
                <td>1.75</td>
                <td>1.24</td>
                <td>1.26</td>
                <td>1.27</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>The number of clusters for each experiment is chosen based on 2 criteria: the elbow method [<xref ref-type="bibr" rid="ref48">48</xref>] and the proportion of sentences to be manually reviewed. The latter should not exceed 15% of the batch. Let us take <xref ref-type="table" rid="table3">Table 3</xref> as an example: we delegate 5 × 40 = 200 top sentences from each cluster to be manually reviewed by the experts. These 200 sentences out of the approximately 2000 sentences in the <italic>cholesterol and statins</italic> topical category make up 10% of the set. It means that by gathering initial labels from only 10% of the sentences from the topical corpus, we can obtain significant (eg, 27% in the 40th percentile) savings of experts’ time during text annotation sessions.</p>
      </sec>
      <sec>
        <title>Zooming in on a Topical Cluster: Case Study of Statins</title>
        <p>We conducted a case study in the subdomain of cholesterol and statins. We did this to gain insight into the process of credibility evaluation and the nature of noncredible medical sentences. The focus on a single topic was dictated by the size and diversity of our data set. Presenting an in-depth qualitative analysis of the entire data set would take too much space. The following is a qualitative analysis of all sentences labeled noncredible by the experts in the selected topic.</p>
      </sec>
      <sec>
        <title>Brief Introduction to the Topic of Statin Use</title>
        <p>Numerous epidemiological studies, Mendelian randomization studies, and randomized controlled trials have consistently demonstrated a relationship between the absolute changes in plasma low-density lipoprotein (LDL) and the risk of atheromatous CVD. The inverse association between plasma high-density lipoprotein and the risk of CVD is among the most consistent and reproducible associations in observational epidemiology. Higher plasma Lp(a) concentrations are associated with an increased risk of CVD, but it appears to be a much weaker risk factor for most people than LDL cholesterol [<xref ref-type="bibr" rid="ref52">52</xref>]. Commonly, plasma cholesterol is used to calculate cardiovascular risk, whereas LDL is used to evaluate the achieving of target values according to the estimated cardiovascular risk.</p>
        <p>Hypercholesterolemia (dyslipidemia with an increased levels of circulating cholesterol) is not the only factor responsible for the development of CVD, but also obesity, poor diet, lack of physical activity, smoking, and high blood pressure (hypertension). To prevent CVD, physicians recommend that patients quit smoking; eat a diet in which approximately 30% of the calories come from fat, choosing polyunsaturated fats and avoiding saturated fats and trans fats; reduce high blood pressure; increase physical activity; and maintain their weight within normal limits [<xref ref-type="bibr" rid="ref53">53</xref>].</p>
        <p>Hydroxymethylglutaryl-coenzyme A reductase inhibitors (statins) lower cholesterol synthesis. Statins represent the cornerstone for the treatment of hypercholesterolemia and in the prevention of CVD, although muscle-related side effects have strongly limited patients’ adherence and compliance [<xref ref-type="bibr" rid="ref53">53</xref>]. The evidence in support of muscle pain caused by statins is in some cases equivocal and not particularly strong. The reported symptoms are difficult to quantify and rarely is it possible to establish a causal link between statins and muscle pain. In randomized controlled trials, statins have been well tolerated, and muscle pain–related side effects were similar to those caused by placebo. An exchange of statins may be beneficial, although all statins have been associated with muscle pain. In some patients, a reduction of dose is worth trying, especially in primary prevention [<xref ref-type="bibr" rid="ref54">54</xref>]. Statins have been linked also to digestive problems, mental fuzziness, and glucose metabolism, and they may rarely cause liver damage. The influence of the diabetogenic action of statins is still unclear. Despite these observations, the CVD preventive benefit of statin treatment outweighs the CVD risk associated with the development of new diabetes [<xref ref-type="bibr" rid="ref55">55</xref>]. There is good evidence that statins given late in life to people at risk for vascular disease do not prevent cognitive decline or dementia [<xref ref-type="bibr" rid="ref56">56</xref>]. Statins can cause transient elevation of liver enzymes, which has led to the unnecessary cessation of these substances prematurely [<xref ref-type="bibr" rid="ref57">57</xref>]. Coenzyme Q10 (CoQ10) is widely used as a dietary supplement, and one of its roles is to act as an antioxidant. Decreased levels have been shown in diseased myocardium and in Parkinson disease. Farnesyl pyrophosphate is a critical intermediate for CoQ10 synthesis, and blockage of this mechanism may be important in statin myopathy. Supplementation with CoQ10 has been reported to be beneficial in treating hypertension, statin myopathy, heart failure, and problems associated with chemotherapy; however, this use of CoQ10 as a supplement has not been confirmed in randomized controlled clinical trials [<xref ref-type="bibr" rid="ref58">58</xref>].</p>
        <p>In conclusion, recent analyses and randomized controlled trials have been published confirming that the cardiovascular benefits of statin therapy in patients for whom it is recommended by current guidelines greatly outweigh the risks of side effects [<xref ref-type="bibr" rid="ref59">59</xref>]. The Cholesterol Treatment Trialists Collaboration meta-analysis showed that for each 1 mmol/L reduction in LDL, major vascular events (myocardial infarction, coronary artery disease death, or any stroke or coronary revascularization) were reduced by 22% and total mortality was reduced by 10% over 5 years [<xref ref-type="bibr" rid="ref59">59</xref>].</p>
      </sec>
      <sec>
        <title>Extracting Categories From Raw Data</title>
        <p>Our data set contains 1986 unique sentences about cholesterol and statins. Of the 1986 sentences, 1041 (52.42%) were labeled by medical experts as credible, 551 (27.74%) as neutral, and 394 (19.84%) as noncredible. We have reviewed the compliance of the assessments in the noncredible class with the annotation protocol. As a result, of the 394 noncredible annotations, 72 (18.3%) were discarded as noncompliant. The following are some examples of sentences erroneously annotated as noncredible:</p>
        <disp-quote>
          <p>“Why are they putting patient lives at risk?” Sentence is a question and should be labeled as neutral.</p>
        </disp-quote>
        <disp-quote>
          <p>“Researchers chose 30 studies in total to analyze.” Sentence does not contain any medical terms and should be labeled as neutral.</p>
        </disp-quote>
        <disp-quote>
          <p>“They [statins] work by blocking an enzyme called HMG-CoA reductase, which makes your body much slower at synthesizing cholesterol.” Sentence contains factually true statement and should be labeled as credible.</p>
        </disp-quote>
        <p>Finally, of the 1986 sentences, we identified 322 (16.21%) as noncredible. We extracted 18 claim categories, which represented 61.5% (198/322) of all noncredible sentences. The process of claim category extraction involved the following steps:</p>
        <list list-type="order">
          <list-item>
            <p>The annotator examined all the sentences from the noncredible class one by one.</p>
          </list-item>
          <list-item>
            <p>If a sentence matched an already existing category, it was assigned to that category; otherwise, a new category was created.</p>
          </list-item>
          <list-item>
            <p>After processing all the sentences, categories with only 1 sentence were merged into a Miscellaneous category that contained the remaining 29.5% (95/322) of the noncredible sentences.</p>
          </list-item>
        </list>
        <p>We also compared the compliance of the extracted claim categories with current medical guidelines and knowledge. The category counts are presented in <xref ref-type="table" rid="table4">Table 4</xref>, and these categories are listed and explained in <xref ref-type="table" rid="table5">Table 5</xref></p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>The number of occurrences of a particular claim category within the <italic>cholesterol</italic> and <italic>statins</italic> subset of sentences.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="320"/>
            <col width="200"/>
            <col width="250"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td>Claim category</td>
                <td>Number of occurrences</td>
                <td>Is related claim factually incorrect?</td>
                <td>Is category based on the content or on the form?</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Miscellaneous</td>
                <td>95</td>
                <td>N/A<sup>a</sup></td>
                <td>Form</td>
              </tr>
              <tr valign="top">
                <td>(stat) Side effects</td>
                <td>43</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>(chol) Not an indicator of CVD<sup>b</sup> risk</td>
                <td>25</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>Diet as good as drugs</td>
                <td>22</td>
                <td>Yes</td>
                <td>Form</td>
              </tr>
              <tr valign="top">
                <td>(chol) Too low is harmful</td>
                <td>18</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>Lifestyle changes are enough</td>
                <td>15</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>Big pharma</td>
                <td>14</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>Inflammation theory</td>
                <td>14</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>(stat) Cause diabetes</td>
                <td>13</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>(stat) Not needed</td>
                <td>10</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>(chol) Makes cells and protects nerves</td>
                <td>8</td>
                <td>No</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>(stat) Not effective</td>
                <td>7</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>(stat) Prescription based solely on (chol) level</td>
                <td>7</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>Detailed data</td>
                <td>7</td>
                <td>N/A</td>
                <td>Form</td>
              </tr>
              <tr valign="top">
                <td>(stat) Cause cognitive impairment</td>
                <td>6</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>(stat) Not studied enough</td>
                <td>6</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>High HDL<sup>c</sup> neutralizes high LDL<sup>d</sup></td>
                <td>6</td>
                <td>No</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>Harmful CoQ10<sup>e</sup> loss</td>
                <td>4</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>(chol) Consumption not an issue</td>
                <td>3</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>Lifestyle versus statins</td>
                <td>2</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
              <tr valign="top">
                <td>No liver function monitoring</td>
                <td>2</td>
                <td>Yes</td>
                <td>Content</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>N/A: not applicable.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>CVD: cardiovascular disease.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>HDL: high-density lipoprotein.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>LDL: low-density lipoprotein.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>CoQ10: Coenzyme Q10.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Claim category and explanations of claim categories extracted manually from all noncredible sentences from the <italic>cholesterol</italic> and <italic>statins</italic> topic.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="700"/>
            <thead>
              <tr valign="top">
                <td>Claim category</td>
                <td>Claim explanation</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>(stat) Side effects</td>
                <td>Statins’ side effects outweigh the benefits</td>
              </tr>
              <tr valign="top">
                <td>(chol) Not an indicator of CVD<sup>a</sup> risk</td>
                <td>Total cholesterol is not an indicator of CVD</td>
              </tr>
              <tr valign="top">
                <td>Diet as good as drugs</td>
                <td>Aggregation of different dietary interventions to lower cholesterol, triglycerides, or sugars</td>
              </tr>
              <tr valign="top">
                <td>(chol) Too low is harmful</td>
                <td>Too low cholesterol level is harmful</td>
              </tr>
              <tr valign="top">
                <td>Lifestyle changes are enough</td>
                <td>People can lower cholesterol level just by developing good habits and eating a proper diet</td>
              </tr>
              <tr valign="top">
                <td>Big pharma</td>
                <td>People (eg, physicians and pharmaceutical company workers) make considerable profit through prescribing statins</td>
              </tr>
              <tr valign="top">
                <td>Inflammation theory</td>
                <td>It is inflammation that causes CVD, not excessive cholesterol level; cholesterol is an effect, not a cause</td>
              </tr>
              <tr valign="top">
                <td>(stat) Cause diabetes</td>
                <td>Statins increase the risk of diabetes</td>
              </tr>
              <tr valign="top">
                <td>(stat) Not needed</td>
                <td>Statins are given to healthy people who do not need them</td>
              </tr>
              <tr valign="top">
                <td>(chol) Makes cells and protects nerves</td>
                <td>Cholesterol produces hormones that make body cells and protect nerves</td>
              </tr>
              <tr valign="top">
                <td>(stat) Not effective</td>
                <td>Statins do not fulfill their role in reducing the risk of CVD</td>
              </tr>
              <tr valign="top">
                <td>(stat) Prescription based solely on (chol) level</td>
                <td>Statin prescription is based solely on total cholesterol level</td>
              </tr>
              <tr valign="top">
                <td>Detailed data</td>
                <td>Sentences contain detailed data, for example, “LDL<sup>b</sup> cholesterol level should not exceed 200 md/dL”</td>
              </tr>
              <tr valign="top">
                <td>(stat) Cause cognitive impairment</td>
                <td>Statin consumption causes different forms of cognitive impairment (including memory loss and slow information processing)</td>
              </tr>
              <tr valign="top">
                <td>(stat) Not studied enough</td>
                <td>Statins’ effectiveness is not studied enough</td>
              </tr>
              <tr valign="top">
                <td>High HDL<sup>c</sup> neutralizes high LDL</td>
                <td>HDL is a so-called good cholesterol, whereas LDL is a so-called bad cholesterol; high levels of the former neutralize negative consequences of high levels of the latter</td>
              </tr>
              <tr valign="top">
                <td>Harmful CoQ10<sup>d</sup> loss</td>
                <td>Statin-related CoQ10 loss is harmful</td>
              </tr>
              <tr valign="top">
                <td>(chol) Consumption not an issue</td>
                <td>People should not worry about cholesterol consumption</td>
              </tr>
              <tr valign="top">
                <td>Lifestyle versus statins</td>
                <td>Lifestyle changes are more effective ways to prevent CVDs than statin consumption</td>
              </tr>
              <tr valign="top">
                <td>No liver function monitoring</td>
                <td>Monitoring of liver function tests is no longer recommended in patients on statin therapy</td>
              </tr>
              <tr valign="top">
                <td>Miscellaneous</td>
                <td>None of the above</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>CVD: cardiovascular disease.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>LDL: low-density lipoprotein.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>HDL: high-density lipoprotein.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>CoQ10: Coenzyme Q10.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Of the 322 noncredible sentences, 198 (61.5%) fall into specific claim categories. Most of the categories have at least 6 examples that spread across different documents. We have designated categories with only 2 or 3 occurrences as separate because the entire noncredible class is relatively small and finding even a few similar sentences may indicate that the claim is being duplicated on the web.</p>
        <p>Of the 95 sentences that did not fall into any claim category, we identified 9 (9%) that bear the hallmarks of a conspiracy theory, 7 (7%) containing reasoning based on anecdotal evidence, and 9 (9%) containing misleading statistical reporting:</p>
        <list list-type="bullet">
          <list-item>
            <p>Conspiracy theory (referring to groups of interests such as prostatin vs antistatin researchers): “Ironically, prostatin researchers themselves are the ones who are guilty of cherry-picking.”</p>
          </list-item>
          <list-item>
            <p>Anecdotal evidence: “What’s worse, my doctor has never asked if I smoke cigarettes, exercise regularly, or eat a healthy diet.”</p>
          </list-item>
          <list-item>
            <p>Misleading statistical evidence: “OK, maybe the benefits of taking a statin are small, but many smart doctors say a reduction of five-tenths or six-tenths of 1% is worthwhile.”</p>
          </list-item>
        </list>
        <p>As part of qualitative analysis, we compared 2 sets of clusters: automatically created versus manually created. We were able to select sentences that contain similar words and statements but differ in the narrative details that skewed the experts’ judgments. We have identified 4 types of false and misleading narratives that occur frequently in the noncredible class. These narratives are as follows:</p>
        <p>1. Slippery slope: The sentence is factually true, but the consequences of the presented fact are exaggerated. Example:</p>
        <disp-quote>
          <p>Hence, while the drug might synergise with a statin to prevent a non-fatal (or minor) heart attack, it seems to increase the risk of some other equally life-threatening pathology, resulting in death.</p>
        </disp-quote>
        <disp-quote>
          <p>Cholesterol also helps in the formation of your memories and is vital for neurological function.</p>
        </disp-quote>
        <p>2. Hedging: The sentence is factually incorrect, but there is a part of it that softens the overtone of the presented statement. Example:</p>
        <disp-quote>
          <p>However, cholesterol content should be less of a concern than fat content.</p>
          <attrib>CRED</attrib>
        </disp-quote>
        <disp-quote>
          <p>Coenzyme Q10 supplements may help prevent statin side effects in some people, though more studies are needed to determine any benefits of taking it.</p>
          <attrib>CRED</attrib>
        </disp-quote>
        <disp-quote>
          <p>The FDA warns on statin labels that some people have developed memory loss or confusion while taking statins.</p>
          <attrib>CRED</attrib>
        </disp-quote>
        <p>3. Suggested negative consequences: The sentence is mostly factually true, but given the context of the expert’s experience, there is a risk that the presented information may lead the patient to act contrary to current medical guidelines. Examples:</p>
        <disp-quote>
          <p>For starters, statin drugs deplete your body of coenzyme Q10 (CoQ10), which is beneficial to heart health and muscle function.</p>
        </disp-quote>
        <disp-quote>
          <p>Cholesterol is a waxy, fatty steroid that your body needs for things like: cell production.</p>
        </disp-quote>
        <p>4. Twisting words: the presence of a single word changes the overtone of the sentence. Examples:</p>
        <disp-quote>
          <p><italic>Statins may slightly increase the risk for Type 2 diabetes, a condition that can lead to heart disease or stroke.</italic> [CRED]</p>
        </disp-quote>
        <disp-quote>
          <p><italic>For example, it may be enough to eat a nutritious diet, exercise regularly, and avoid smoking tobacco products.</italic> [NONCRED]</p>
          <p>versus</p>
        </disp-quote>
        <disp-quote>
          <p>Eating a healthy diet and doing regular exercise can help lower the level of cholesterol in your blood.</p>
          <attrib>CRED</attrib>
        </disp-quote>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The results of our experiments show that applying the active annotation paradigm for credibility assessment in the medical domain produces measurable gains in terms of the use of medical experts’ time. Active annotation allows us to raise the number of noncredible statements annotated by medical experts by 30% on average, within a fixed time and monetary budget. Annotation of medical information cannot be crowdsourced because it requires the deep and broad domain knowledge of medical experts and their time is expensive. We regard the problem of prohibitively expensive annotation costs as the main obstacle to the broad use of machine learning models in the evaluation of the credibility of web-based medical resources. Our proposal is a step toward a significant lowering of these costs.</p>
        <p>However, there is still room for improvement. Our qualitative analysis shows that most of the noncredible sentences can be classified into a limited number of categories. The subset of approximately 200 noncredible sentences from the <italic>cholesterol and statins</italic> subdomain can be divided into 18 categories, each representing approximately one false statement. These 18 categories fall into 61.5% (198/322) of the total number of all sentences labeled in full accordance with the annotation protocol. This indicates the importance of precise semantic clustering. More accurate clustering helps to detect noncredible sentences faster. It also enables the tagging of clusters with topic-related labels by nonexperts for later reviewing by medical experts and, as a result, the even more useful sentence ranking. In other words, it might be possible to use crowdsourcing to some extent during preprocessing and include an expert in the loop in the main annotation pipeline, further reducing the annotation costs.</p>
        <p>Another conclusion that we drew from the qualitative analysis concerns the precision of the semantic similarity measure based on sentence embeddings. The method captures well the overall theme of the sentence but often misses the stance of the presented claim. This error is understandable because the stance in the medical domain is often expressed through subtle sentence modifications, as listed in the <italic>Results</italic> section. Sentence embeddings also struggle with finding a good representation of the form of the sentence—whether it is a supposition, a question, or a statement. Recognition of the form of the sentence can improve the accuracy of classification of neutral sentences that do not require medical expert annotation.</p>
        <p>Finally, the qualitative analysis has revealed 4 distinct narratives present in noncredible sentences. Although our analysis was limited to the topic of cholesterol and statins, we feel that these narratives are more general in nature and may apply broadly to false medical information on other topics. If this hypothesis is confirmed, it may be possible to develop machine learning models for these narratives (eg, a model searching for instances of hedging expressions or words capable of twisting the stance of the sentence). Tagging these narratives during credibility annotation may not only increase the precision of sentence classifiers built upon such data sets, but, most importantly, also help disambiguate experts’ labeling process.</p>
      </sec>
      <sec>
        <title>Conclusions and Future Work</title>
        <p>With the web quickly becoming one of the primary sources of the first medical information for the general public [<xref ref-type="bibr" rid="ref60">60</xref>], the ability to distinguish between credible and noncredible information is indispensable. Financial interests of the alternative medicine community, combined with the rising distrust of the medical establishment, produce voluminous corpora of medical information of questionable quality. Of note, too many people fall prey to medical misinformation because it becomes increasingly harder to tell credible content from harmful deceit.</p>
        <p>A possible solution to the problem of medical information source credibility is external certification. In our experiments, we correlated medical experts’ labels with HON labels. The certification certainly works because only 18% (240/1333) of the sentences originating from HON-certified websites were classified by our experts as noncredible. However, obtaining the certificate is not simple, the certification process is long, and the entire framework does not scale well. This scalability problem demonstrates the bottleneck of any approach used for checking the credibility of medical content—the availability and time of medical professionals who need to be involved in the evaluation. In our work, we have taken the approach of optimizing the use of the time spent by experts on credibility evaluation of medical web content. The main goal of our future work will be the improvement and extension of this approach using active annotation and active learning methods.</p>
        <p>In contrast, an ambitious goal would be to replace medical experts’ evaluations with an automated credibility evaluation system. Such a system would use advanced natural language processing and machine classification algorithms. The results of our research demonstrate the challenges that would need to be overcome to make this possible.</p>
        <p>The computational linguistic community is currently divided into 2 opposing camps: those who attribute <italic>understanding of meaning</italic> to language models and those who do not [<xref ref-type="bibr" rid="ref61">61</xref>]. Despite the recent successes of modern language models such as Generative Pre-trained Transformer 3, the evidence seems to support a more cautious position. Indeed, a language model trained only on the form (raw text) cannot capture the true meaning of the text. The meaning, in this context, should be understood as the relationship between the linguistic form and the communicative intent of the speaker.</p>
        <p>Our case goes beyond the learning of the meaning of sentences. As we have shown in this paper, there is an additional layer of complexity introduced by the notion of credibility of a statement to a user. Many machine learning solutions focus on the identification of factual flaws when addressing misinformation. However, fact-checking is not enough in the medical information domain. Often one encounters fake news and disinformation woven around factually true statements. We have seen time and time again medical experts using contextual information when assigning labels denoting sentence credibility. Most often they would take into account the most probable course of action taken by a patient who consumes medical information. Because of this mechanics of annotation, the relationship between sentence credibility and sentence truthfulness becomes ambiguous, further complicating the shape of the decision boundary between credible and noncredible medical statements.</p>
        <p>This observation leads us to an important conclusion about the design of information-processing pipelines for medical content credibility evaluation. The first step is the compilation of large, high-quality data sets for machine learning model training. The active annotation approach presented in this paper allows doubling the number of sentences annotated by medical experts per cost unit (time or monetary). This, in turn, results in larger and more comprehensive training data sets. As a side effect, active annotation produces topical clusters of sentences, which can be used in 2 ways: (1) by allowing nonexpert annotators (whose time is far less expensive) to preprocess large batches of sentences to be reviewed by medical experts and (2) by reducing the cognitive stress of expert annotators due to the removal of context switching.</p>
        <p>These 2 effects combined can further enhance the annotation process and increase the volume of annotated data. We also plan to extend the scope of the data set by covering more topics and providing more annotations.</p>
        <p>The second step toward the support of medical content credibility evaluation would be the investigation of statistical models’ efficacy for automatic classification of medical sentences as either credible or noncredible. Having an accurate classifier of medical sentence credibility, we might develop machine-assisted methods for finding consensus among human annotators, for example, by correlating human annotations with the confidence scores of the classifier. Finally, we would like to pursue active annotation in the light of 2 frameworks. Bayesian reasoning provides a set of tools for modeling individual annotators’ beliefs about annotated data. Expectation maximization, in contrast, allows finding the best approximations (or maximum a posteriori estimates) of the unknown point credibility scores from empirical data. We see several possibilities of including the active annotation step in the iterative processes of Bayesian inference or expectation maximization.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Queries used to retrieve articles.</p>
        <media xlink:href="medinform_v9i11e26065_app1.docx" xlink:title="DOCX File , 13 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>List of article URLs.</p>
        <media xlink:href="medinform_v9i11e26065_app2.docx" xlink:title="DOCX File , 36 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CoQ10</term>
          <def>
            <p>Coenzyme Q10</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CVD</term>
          <def>
            <p>cardiovascular disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">HON</term>
          <def>
            <p>Health on the Net</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LDL</term>
          <def>
            <p>low-density lipoprotein</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">sBERT</term>
          <def>
            <p>sentence–Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The research leading to these results has received funding from the European Economic Area Financial Mechanism 2014-2021 (project registration number: 2019/35/J/HS6/03498).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zarocostas</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>How to fight an infodemic</article-title>
          <source>Lancet</source>
          <year>2020</year>
          <month>02</month>
          <day>29</day>
          <volume>395</volume>
          <issue>10225</issue>
          <fpage>676</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32113495"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(20)30461-X</pub-id>
          <pub-id pub-id-type="medline">32113495</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(20)30461-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC7133615</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>5G conspiracy theories prosper during the coronavirus pandemic</article-title>
          <source>Snopes</source>
          <access-date>2021-11-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.snopes.com/news/2020/04/09/5g-conspiracy-theories-prosper-during-the-coronavirus-pandemic">https://www.snopes.com/news/2020/04/09/5g-conspiracy-theories-prosper-during-the-coronavirus-pandemic</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jablonka</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Happle</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Grote</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Schleenvoigt</surname>
              <given-names>BT</given-names>
            </name>
            <name name-style="western">
              <surname>Hampel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dopfer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hansen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Behrens</surname>
              <given-names>GM</given-names>
            </name>
          </person-group>
          <article-title>Measles, mumps, rubella, and varicella seroprevalence in refugees in Germany in 2015</article-title>
          <source>Infection</source>
          <year>2016</year>
          <month>12</month>
          <volume>44</volume>
          <issue>6</issue>
          <fpage>781</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1007/s15010-016-0926-7</pub-id>
          <pub-id pub-id-type="medline">27449329</pub-id>
          <pub-id pub-id-type="pii">10.1007/s15010-016-0926-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <article-title>Medical credibility corpus</article-title>
          <source>GitHub</source>
          <access-date>2021-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/alenabozny/medical_credibility_corpus">https://github.com/alenabozny/medical_credibility_corpus</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sørensen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pelikan</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Röthlin</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ganahl</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Slonska</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Doyle</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Fullam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kondilis</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Agrafiotis</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Uiters</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Falcon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mensing</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tchamov</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>van den Broucke</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Brand</surname>
              <given-names>H</given-names>
            </name>
            <collab>HLS-EU Consortium</collab>
          </person-group>
          <article-title>Health literacy in Europe: comparative results of the European health literacy survey (HLS-EU)</article-title>
          <source>Eur J Public Health</source>
          <year>2015</year>
          <month>12</month>
          <volume>25</volume>
          <issue>6</issue>
          <fpage>1053</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25843827"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/eurpub/ckv043</pub-id>
          <pub-id pub-id-type="medline">25843827</pub-id>
          <pub-id pub-id-type="pii">ckv043</pub-id>
          <pub-id pub-id-type="pmcid">PMC4668324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Keleher</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hagger</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Health literacy in primary health care</article-title>
          <source>Aust J Prim Health</source>
          <year>2007</year>
          <month>07</month>
          <day>15</day>
          <volume>13</volume>
          <issue>2</issue>
          <fpage>24</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aafp.org/link_out?pmid=26176370"/>
          </comment>
          <pub-id pub-id-type="doi">10.1071/PY07020</pub-id>
          <pub-id pub-id-type="pii">d12016</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dunn</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Health literacy and the internet: a study on the readability of Australian online health information</article-title>
          <source>Aust N Z J Public Health</source>
          <year>2015</year>
          <month>08</month>
          <day>25</day>
          <volume>39</volume>
          <issue>4</issue>
          <fpage>309</fpage>
          <lpage>14</lpage>
          <pub-id pub-id-type="doi">10.1111/1753-6405.12341</pub-id>
          <pub-id pub-id-type="medline">25716142</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Trethewey</surname>
              <given-names>SP</given-names>
            </name>
          </person-group>
          <article-title>Strategies to combat medical misinformation on social media</article-title>
          <source>Postgrad Med J</source>
          <year>2020</year>
          <month>01</month>
          <day>15</day>
          <volume>96</volume>
          <issue>1131</issue>
          <fpage>4</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pmj.bmj.com/lookup/pmidlookup?view=long&#38;pmid=31732511"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/postgradmedj-2019-137201</pub-id>
          <pub-id pub-id-type="medline">31732511</pub-id>
          <pub-id pub-id-type="pii">postgradmedj-2019-137201</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ecker</surname>
              <given-names>UK</given-names>
            </name>
            <name name-style="western">
              <surname>O'Reilly</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Reid</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>EP</given-names>
            </name>
          </person-group>
          <article-title>The effectiveness of short-format refutational fact-checks</article-title>
          <source>Br J Psychol</source>
          <year>2020</year>
          <month>02</month>
          <day>02</day>
          <volume>111</volume>
          <issue>1</issue>
          <fpage>36</fpage>
          <lpage>54</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30825195"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/bjop.12383</pub-id>
          <pub-id pub-id-type="medline">30825195</pub-id>
          <pub-id pub-id-type="pmcid">PMC7004143</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nyhan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Reifler</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>When corrections fail: the persistence of political misperceptions</article-title>
          <source>Polit Behav</source>
          <year>2010</year>
          <month>3</month>
          <day>30</day>
          <volume>32</volume>
          <issue>2</issue>
          <fpage>303</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1007/s11109-010-9112-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Horne</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Gruppi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Adali</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Trustworthy misinformation mitigation with soft information nudging</article-title>
          <source>Proceedings of the 2019 First IEEE International Conference on Trust, Privacy and Security in Intelligent Systems and Applications (TPS-ISA)</source>
          <year>2019</year>
          <conf-name>2019 First IEEE International Conference on Trust, Privacy and Security in Intelligent Systems and Applications (TPS-ISA)</conf-name>
          <conf-date>Dec 12-14, 2019</conf-date>
          <conf-loc>Los Angeles, CA, USA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/9014346"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/tps-isa48467.2019.00039</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dhoju</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rony</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Kabir</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Hassan</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Differences in health news from reliable and unreliable media</article-title>
          <source>Proceedings of the WWW '19: The Web Conference</source>
          <year>2019</year>
          <conf-name>WWW '19: The Web Conference</conf-name>
          <conf-date>May 13-17, 2019</conf-date>
          <conf-loc>San Francisco USA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3308560.3316741</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fernández-Pichel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Losada</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Pichel</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Elsweiler</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Reliability prediction for health-related content: a replicability study</article-title>
          <source>Advances in Information Retrieval</source>
          <year>2021</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Afsana</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Kabir</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Hassan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Automatically assessing quality of online health articles</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2021</year>
          <month>2</month>
          <volume>25</volume>
          <issue>2</issue>
          <fpage>591</fpage>
          <lpage>601</lpage>
          <pub-id pub-id-type="doi">10.1109/jbhi.2020.3032479</pub-id>
          <pub-id pub-id-type="medline">33079686</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Augenstein</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Lioma</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lima</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Hansen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hansen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Simonsen</surname>
              <given-names>JG</given-names>
            </name>
          </person-group>
          <article-title>MultiFC: a real-world multi-domain dataset for evidence-based fact checking of claims</article-title>
          <source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</source>
          <year>2019</year>
          <conf-name>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</conf-name>
          <conf-date>Nov 2019</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D19-1475"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1475</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>WY</given-names>
            </name>
          </person-group>
          <article-title>"Liar, Liar Pants on Fire": a new benchmark dataset for fake news detection</article-title>
          <source>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</source>
          <year>2017</year>
          <conf-name>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</conf-name>
          <conf-date>Jul 2017</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P17-2067"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/p17-2067</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rashkin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Jang</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Volkova</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Truth of varying shades: analyzing language in fake news and political fact-checking</article-title>
          <source>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2017</year>
          <conf-name>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>Sep, 2017</conf-date>
          <conf-loc>Copenhagen, Denmark</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/d17-1317</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tchechmedjiev</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fafalios</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Boland</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gasquet</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zloch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zapilko</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Dietze</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Todorov</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>ClaimsKG: a knowledge graph of fact-checked claims</article-title>
          <source>The Semantic Web – ISWC 2019</source>
          <year>2019</year>
          <month>10</month>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hassan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Arslan</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tremayne</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Toward automated fact-checking: detecting check-worthy factual claims by ClaimBuster</article-title>
          <source>Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2017</year>
          <conf-name>Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>Aug 13 - 17, 2017</conf-date>
          <conf-loc>Halifax NS Canada</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3097983.3098131</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karmakharm</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Aletras</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bontcheva</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Journalist-in-the-loop: continuous learning as a service for rumour analysis</article-title>
          <source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations</source>
          <year>2019</year>
          <conf-name>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations</conf-name>
          <conf-date>Nov 2019</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/d19-3020</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Samuel</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zaïane</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>MedFact: towards improving veracity of medical information in social media using applied machine learning</article-title>
          <source>Advances in Artificial Intelligence</source>
          <year>2018</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer International Publishing</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yilmaz</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Cross-domain modeling of sentence-level evidence for document retrieval</article-title>
          <source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</source>
          <year>2019</year>
          <conf-name>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</conf-name>
          <conf-date>Nov, 2019</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D19-1352"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1352</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Cross-lingual passage re-ranking with alignment augmented multilingual BERT</article-title>
          <source>IEEE Access</source>
          <year>2020</year>
          <month>12</month>
          <day>1</day>
          <volume>8</volume>
          <fpage>213232</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2020.3041605</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ghenai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mejova</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Catching Zika fever: application of crowdsourcing and machine learning for tracking health misinformation on Twitter</article-title>
          <source>Proceedings of the 2017 IEEE International Conference on Healthcare Informatics (ICHI)</source>
          <year>2017</year>
          <conf-name>IEEE International Conference on Healthcare Informatics (ICHI)</conf-name>
          <conf-date>Aug 23-26, 2017</conf-date>
          <conf-loc>Park City, UT, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ichi.2017.58</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shepperd</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Charnock</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Why DISCERN?</article-title>
          <source>Health Expect</source>
          <year>1998</year>
          <month>11</month>
          <day>04</day>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>134</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/11281867"/>
          </comment>
          <pub-id pub-id-type="doi">10.1046/j.1369-6513.1998.0112a.x</pub-id>
          <pub-id pub-id-type="medline">11281867</pub-id>
          <pub-id pub-id-type="pii">hex112a</pub-id>
          <pub-id pub-id-type="pmcid">PMC5139898</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moult</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Franck</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Brady</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Ensuring quality information for patients: development and preliminary validation of a new instrument to improve the quality of written health care information</article-title>
          <source>Health Expect</source>
          <year>2004</year>
          <month>06</month>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>165</fpage>
          <lpage>75</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1111/j.1369-7625.2004.00273.x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1369-7625.2004.00273.x</pub-id>
          <pub-id pub-id-type="medline">15117391</pub-id>
          <pub-id pub-id-type="pii">HEX273</pub-id>
          <pub-id pub-id-type="pmcid">PMC5060233</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bunge</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mühlhauser</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Steckelberg</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>What constitutes evidence-based patient information? Overview of discussed criteria</article-title>
          <source>Patient Educ Couns</source>
          <year>2010</year>
          <month>03</month>
          <volume>78</volume>
          <issue>3</issue>
          <fpage>316</fpage>
          <lpage>28</lpage>
          <pub-id pub-id-type="doi">10.1016/j.pec.2009.10.029</pub-id>
          <pub-id pub-id-type="medline">20005067</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(09)00519-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Working Group GPGI</collab>
          </person-group>
          <article-title>Good practice guidelines for health information</article-title>
          <source>Zeitschrift für Evidenz, Fortbildung und Qualität im Gesundheitswesen</source>
          <year>2016</year>
          <volume>110-111</volume>
          <fpage>e1</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.zefq-journal.com/article/S1865-9217(16)30024-1/fulltext"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.zefq.2016.01.004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Keselman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Murcko</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Kaufman</surname>
              <given-names>DR</given-names>
            </name>
          </person-group>
          <article-title>Evaluating the quality of health information in a changing digital ecosystem</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>02</month>
          <day>08</day>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>e11129</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/2/e11129/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/11129</pub-id>
          <pub-id pub-id-type="medline">30735144</pub-id>
          <pub-id pub-id-type="pii">v21i2e11129</pub-id>
          <pub-id pub-id-type="pmcid">PMC6384537</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marinelli</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Cervone</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tortoreto</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Stepanov</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Di Fabbrizio</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Riccardi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Active annotation: bootstrapping annotation lexicon and guidelines for supervised NLU learning</article-title>
          <source>Proc Interspeech</source>
          <year>2019</year>
          <fpage>574</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.21437/interspeech.2019-2537</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of words and phrases and their compositionality</article-title>
          <source>Proceedings of the 26th International Conference on Neural Information Processing Systems</source>
          <year>2013</year>
          <conf-name>Proceedings of the 26th International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>Dec 5 - 10, 2013</conf-date>
          <conf-loc>Lake Tahoe Nevada</conf-loc>
          <pub-id pub-id-type="doi">10.5555/2999792.2999959</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennington</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Glove: global vectors for word representation</article-title>
          <source>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</source>
          <year>2014</year>
          <conf-name>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</conf-name>
          <conf-date>Oct, 2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D14-1162"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Goldberg</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Dependency-based word embeddings</article-title>
          <source>Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</source>
          <year>2014</year>
          <conf-name>Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</conf-name>
          <conf-date>Jun, 2014</conf-date>
          <conf-loc>Baltimore, Maryland</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P14-2050"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/v1/p14-2050</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nickel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kiela</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Poincaré embeddings for learning hierarchical representations</article-title>
          <source>arXiv</source>
          <year>2017</year>
          <access-date>2021-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1705.08039">https://arxiv.org/abs/1705.08039</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bojanowski</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Grave</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Joulin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Enriching word vectors with subword information</article-title>
          <source>ArXiv.org</source>
          <year>2017</year>
          <month>12</month>
          <access-date>2021-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1607.04606">https://arxiv.org/abs/1607.04606</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Comput</source>
          <year>1997</year>
          <month>11</month>
          <day>15</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
          <pub-id pub-id-type="medline">9377276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>van Merrienboer</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gulcehre</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bahdanau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bougares</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Schwenk</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Learning phrase representations using RNN encoder-decoder for statistical machine translation</article-title>
          <source>Proceedings of the  Conference on Empirical Methods in Natural Language Processing (EMNLP)</source>
          <year>2014</year>
          <conf-name>Proceedings of the  Conference on Empirical Methods in Natural Language Processing (EMNLP)</conf-name>
          <conf-date>Oct, 2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D14-1179"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1179</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>arXiv</source>
          <year>2017</year>
          <access-date>2021-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1706.03762">https://arxiv.org/abs/1706.03762</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hua</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Limtiaco</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>St. John</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Constant</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Guajardo-Cespedes</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tar</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sung</surname>
              <given-names>Y-H</given-names>
            </name>
            <name name-style="western">
              <surname>Strope</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kurzweil</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Universal sentence encoder</article-title>
          <source>arXiv</source>
          <year>2018</year>
          <access-date>2011-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1803.11175">https://arxiv.org/abs/1803.11175</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ryder</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Subbiah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kaplan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dhariwal</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Neelakantan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shyam</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sastry</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Askell</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Herbert-Voss</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Krueger</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Henighan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Child</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ramesh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ziegler</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Winter</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hesse</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sigler</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Litwin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chess</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Berner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McCandlish</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Amodei</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Language models are few-shot learners</article-title>
          <source>ArXiv.org</source>
          <year>2020</year>
          <access-date>2021-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2005.14165">https://arxiv.org/abs/2005.14165</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M-W</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Bert: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>ArXiv.org</source>
          <year>2018</year>
          <access-date>2021-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1810.04805">https://arxiv.org/abs/1810.04805</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reimers</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gurevych</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Sentence-BERT: sentence embeddings using siamese BERT-networks</article-title>
          <source>ArXiv.org</source>
          <year>2019</year>
          <access-date>2021-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1908.10084">https://arxiv.org/abs/1908.10084</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chicco</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Siamese neural networks: an overview</article-title>
          <source>Methods Mol Biol</source>
          <year>2021</year>
          <volume>2190</volume>
          <fpage>73</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1007/978-1-0716-0826-5_3</pub-id>
          <pub-id pub-id-type="medline">32804361</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <article-title>Sentence (linguistics)</article-title>
          <source>Wikipedia</source>
          <access-date>2021-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://en.wikipedia.org/wiki/Sentence_(linguistics)">https://en.wikipedia.org/wiki/Sentence_(linguistics)</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lorenz-Spreen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mønsted</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Hövel</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lehmann</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Accelerating dynamics of collective attention</article-title>
          <source>Nat Commun</source>
          <year>2019</year>
          <month>04</month>
          <day>15</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>1759</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-019-09311-w"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-019-09311-w</pub-id>
          <pub-id pub-id-type="medline">30988286</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-019-09311-w</pub-id>
          <pub-id pub-id-type="pmcid">PMC6465266</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nabożny</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Balcerzak</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Koržinek</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Enriching the context: methods of improving the non-contextual assessment of sentence credibility</article-title>
          <source>Web Information Systems Engineering – WISE 2019</source>
          <year>2019</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krishna</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Murty</surname>
              <given-names>MN</given-names>
            </name>
          </person-group>
          <article-title>Genetic K-means algorithm</article-title>
          <source>IEEE Trans Syst Man Cybern B Cybern</source>
          <year>1999</year>
          <volume>29</volume>
          <issue>3</issue>
          <fpage>433</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1109/3477.764879</pub-id>
          <pub-id pub-id-type="medline">18252317</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Research on K-Value selection method of K-means clustering algorithm</article-title>
          <source>J</source>
          <year>2019</year>
          <month>06</month>
          <day>18</day>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>226</fpage>
          <lpage>35</lpage>
          <pub-id pub-id-type="doi">10.3390/j2020016</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Myle</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RoBERTa: a robustly optimized BERT pretraining approach</article-title>
          <source>ArXiv.org</source>
          <year>2019</year>
          <access-date>2021-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1907.11692">https://arxiv.org/abs/1907.11692</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raunak</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Simple and effective dimensionality reduction for word embeddings</article-title>
          <source>ArXiv.org</source>
          <year>2017</year>
          <access-date>2021-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tinyurl.com/vf29aah8">https://tinyurl.com/vf29aah8</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bhat</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Viswanath</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>All-but-the-top: simple and effective postprocessing for word representations</article-title>
          <source>ArXiv.org</source>
          <year>2017</year>
          <access-date>2021-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1702.01417">https://arxiv.org/abs/1702.01417</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alhmoud</surname>
              <given-names>EN</given-names>
            </name>
            <name name-style="western">
              <surname>Barazi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Fahmi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Abdu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Higazy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Elhajj</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Critical appraisal of the clinical practice guidelines for the management of dyslipidaemias: lipid modification to reduce cardiovascular Riskuropean Society of Cardiology (ESC) and European Atherosclerosis Society (ESC/EAS) 2019 guidelines</article-title>
          <source>J Pharm Health Serv Res</source>
          <year>2020</year>
          <month>11</month>
          <volume>11</volume>
          <issue>4</issue>
          <fpage>423</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1111/jphs.12371</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ferri</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Corsini</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Clinical pharmacology of statins: an update</article-title>
          <source>Curr Atheroscler Rep</source>
          <year>2020</year>
          <month>06</month>
          <day>03</day>
          <volume>22</volume>
          <issue>7</issue>
          <fpage>26</fpage>
          <pub-id pub-id-type="doi">10.1007/s11883-020-00844-w</pub-id>
          <pub-id pub-id-type="medline">32494971</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11883-020-00844-w</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pergolizzi Jr</surname>
              <given-names>JV</given-names>
            </name>
            <name name-style="western">
              <surname>Coluzzi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Colucci</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Olsson</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>LeQuang</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Saadi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Magnusson</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Statins and muscle pain</article-title>
          <source>Expert Rev Clin Pharmacol</source>
          <year>2020</year>
          <month>03</month>
          <day>27</day>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>299</fpage>
          <lpage>310</lpage>
          <pub-id pub-id-type="doi">10.1080/17512433.2020.1734451</pub-id>
          <pub-id pub-id-type="medline">32089020</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yandrapalli</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Malik</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Guber</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rochlani</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pemmasani</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Jasti</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Aronow</surname>
              <given-names>WS</given-names>
            </name>
          </person-group>
          <article-title>Statins and the potential for higher diabetes mellitus risk</article-title>
          <source>Expert Rev Clin Pharmacol</source>
          <year>2019</year>
          <month>09</month>
          <day>31</day>
          <volume>12</volume>
          <issue>9</issue>
          <fpage>825</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1080/17512433.2019.1659133</pub-id>
          <pub-id pub-id-type="medline">31474169</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McGuinness</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Craig</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bullock</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Passmore</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Statins for the prevention of dementia</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2016</year>
          <month>01</month>
          <day>04</day>
          <issue>1</issue>
          <fpage>CD003160</fpage>
          <pub-id pub-id-type="doi">10.1002/14651858.CD003160.pub3</pub-id>
          <pub-id pub-id-type="medline">26727124</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shrestha</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mulmi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Munankarmi</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Statins and abnormal liver enzymes</article-title>
          <source>S D Med</source>
          <year>2019</year>
          <month>01</month>
          <volume>72</volume>
          <issue>1</issue>
          <fpage>12</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="medline">30849222</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Whayne</surname>
              <given-names>TF</given-names>
            </name>
          </person-group>
          <article-title>Coenzyme Q-10 in human health: supporting evidence?</article-title>
          <source>South Med J</source>
          <year>2016</year>
          <month>01</month>
          <volume>109</volume>
          <issue>1</issue>
          <fpage>17</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.14423/SMJ.0000000000000393</pub-id>
          <pub-id pub-id-type="medline">26741866</pub-id>
          <pub-id pub-id-type="pii">SMJ50123</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Cholesterol Treatment Trialists’ (CTT) Collaboration</collab>
            <name name-style="western">
              <surname>Baigent</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Blackwell</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Emberson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Holland</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Reith</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bhala</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Peto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Keech</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Simes</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Efficacy and safety of more intensive lowering of LDL cholesterol: a meta-analysis of data from 170,000 participants in 26 randomised trials</article-title>
          <source>Lancet</source>
          <year>2010</year>
          <month>11</month>
          <day>13</day>
          <volume>376</volume>
          <issue>9753</issue>
          <fpage>1670</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0140-6736(10)61350-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(10)61350-5</pub-id>
          <pub-id pub-id-type="medline">21067804</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(10)61350-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC2988224</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gwizdka</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Trace</surname>
              <given-names>CB</given-names>
            </name>
          </person-group>
          <article-title>Consumer evaluation of the quality of online health information: systematic literature review of relevant criteria and indicators</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>05</month>
          <day>02</day>
          <volume>21</volume>
          <issue>5</issue>
          <fpage>e12522</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/5/e12522/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12522</pub-id>
          <pub-id pub-id-type="medline">31045507</pub-id>
          <pub-id pub-id-type="pii">v21i5e12522</pub-id>
          <pub-id pub-id-type="pmcid">PMC6521213</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bender</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Koller</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Climbing towards NLU: on meaning, form, and understanding in the age of data</article-title>
          <source>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2020</year>
          <conf-name>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>Jul, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <fpage>5185</fpage>
          <lpage>98</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.463</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
