<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i11e30467</article-id>
      <article-id pub-id-type="pmid">34623954</article-id>
      <article-id pub-id-type="doi">10.2196/30467</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>The Evolution of Rumors on a Closed Social Networking Platform During COVID-19: Algorithm Development and Content Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhang</surname>
            <given-names>Chunyan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lalmuanawma</surname>
            <given-names>Samuel</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Andrea W</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6592-1132</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Lan</surname>
            <given-names>Jo-Yu</given-names>
          </name>
          <degrees>BEng</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1628-1903</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Ming-Hung</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5680-4003</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>Chihhao</given-names>
          </name>
          <degrees>MFA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Information Operations Research Group</institution>
            <addr-line>7F-13, No. 103, Sec. 1, Fuxing S. Rd., Da’an Dist.</addr-line>
            <addr-line>Taipei, 106</addr-line>
            <country>Taiwan</country>
            <phone>886 933 263 989</phone>
            <email>chihhao@iorg.tw</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0939-2111</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Information Operations Research Group</institution>
        <addr-line>Taipei</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Information Engineering and Computer Science</institution>
        <institution>Feng Chia University</institution>
        <addr-line>Taichung</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Computer Science and Information Engineering</institution>
        <institution>National Chung Cheng University</institution>
        <addr-line>Chiayi</addr-line>
        <country>Taiwan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Chihhao Yu <email>chihhao@iorg.tw</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>23</day>
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>11</issue>
      <elocation-id>e30467</elocation-id>
      <history>
        <date date-type="received">
          <day>21</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>7</day>
          <month>6</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>29</day>
          <month>6</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>10</day>
          <month>9</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Andrea W Wang, Jo-Yu Lan, Ming-Hung Wang, Chihhao Yu. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 23.11.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/11/e30467" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>In 2020, the COVID-19 pandemic put the world in a crisis regarding both physical and psychological health. Simultaneously, a myriad of unverified information flowed on social media and online outlets. The situation was so severe that the World Health Organization identified it as an infodemic in February 2020.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to examine the propagation patterns and textual transformation of COVID-19–related rumors on a closed social media platform.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We obtained a data set of suspicious text messages collected on Taiwan’s most popular instant messaging platform, LINE, between January and July 2020. We proposed a classification-based clustering algorithm that could efficiently cluster messages into groups, with each group representing a rumor. For ease of understanding, a group is referred to as a “rumor group.” Messages in a rumor group could be identical or could have limited textual differences between them. Therefore, each message in a rumor group is a form of the rumor.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>A total of 936 rumor groups with at least 10 messages each were discovered among 114,124 text messages collected from LINE. Among 936 rumors, 396 (42.3%) were related to COVID-19. Of the 396 COVID-19–related rumors, 134 (33.8%) had been fact-checked by the International Fact-Checking Network–certified agencies in Taiwan and determined to be false or misleading. By studying the prevalence of simplified Chinese characters or phrases in the messages that originated in China, we found that COVID-19–related messages, compared to non–COVID-19–related messages, were more likely to have been written by non-Taiwanese users. The association was statistically significant, with <italic>P</italic>&#60;.001, as determined by the chi-square independence test. The qualitative investigations of the three most popular COVID-19 rumors revealed that key authoritative figures, mostly medical personnel, were often misquoted in the messages. In addition, these rumors resurfaced multiple times after being fact-checked, usually preceded by major societal events or textual transformations.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>To fight the infodemic, it is crucial that we first understand why and how a rumor becomes popular. While social media has given rise to an unprecedented number of unverified rumors, it also provides a unique opportunity for us to study the propagation of rumors and their interactions with society. Therefore, we must put more effort into these areas.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>rumors</kwd>
        <kwd>rumor diffusion</kwd>
        <kwd>rumor propagation</kwd>
        <kwd>social listening</kwd>
        <kwd>infodemic</kwd>
        <kwd>social media</kwd>
        <kwd>closed platform</kwd>
        <kwd>natural language processing</kwd>
        <kwd>machine learning</kwd>
        <kwd>unsupervised learning</kwd>
        <kwd>computers and society</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Online social media has democratized content. By creating a direct path from content producers to consumers, the power of production and sharing of information has been redistributed from limited parties to general populations. However, social media platforms have also given rise to the proliferation of misinformation and enabled the fast dissemination of unverified rumors [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. In 2020, the COVID-19 pandemic put the world in a crisis regarding both physical and psychological health. A myriad of unverified information flowed on social media. Rumors and claims of erroneous health practices even interfered with the control of COVID-19 in various parts of the world [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. The World Health Organization (WHO) identified this situation as an infodemic in February 2020 [<xref ref-type="bibr" rid="ref6">6</xref>], indicating its seriousness.</p>
      <p>Previous studies revealed that people relied on social media to gather COVID-19 information and guidelines [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Efforts have, thus, been put into studies examining true and false rumors on social media [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. For example, Cinelli et al [<xref ref-type="bibr" rid="ref9">9</xref>] compared feedback to reliable and questionable COVID-19 information across five platforms, including Twitter, YouTube, and Gab. Gallotti et al [<xref ref-type="bibr" rid="ref10">10</xref>] looked at how much unreliable COVID-19 information Twitter users were exposed to across countries.</p>
      <p>Machine learning and deep learning techniques have been employed to study COVID-19 posts on social media, with much of the focus on topic modeling, sentiment analysis, and misinformation detection [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. Both sentiment analysis and misinformation detection are supervised classification problems. Many studies have employed the Valence Aware Dictionary and Sentiment Reasoner (VADER) model or long short-term memory (LSTM) for sentiment analysis and ensemble machine learning models, such as Extreme Gradient Boosting (XGBoost), for misinformation detection [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. Topic modeling, on the other hand, is an unsupervised clustering method. Among topic modeling studies, latent Dirichlet allocation (LDA) was the most widely used algorithm [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref24">24</xref>], and other favorites included k-means clustering [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. For example, Chandrasekaran et al [<xref ref-type="bibr" rid="ref15">15</xref>] utilized LDA to extract 26 topics among 13.9 million English COVID-19 Twitter posts. Then they adopted the VADER model to compute sentiment scores for each topic. Jelodar et al [<xref ref-type="bibr" rid="ref19">19</xref>] employed LDA to extract topics from 560,000 COVID-19 Twitter posts and then used the LSTM neural network to identify the sentiments of the posts. Kwok et al [<xref ref-type="bibr" rid="ref21">21</xref>] employed LDA to extract topics and Stanford University’s CoreNLP (natural language processing) to study the sentiments of Twitter posts regarding COVID-19 vaccinations from Australian Twitter accounts. Also, Chen et al [<xref ref-type="bibr" rid="ref16">16</xref>] compared the COVID-19 discussions on Twitter and Weibo using t-distributed stochastic neighbor embedding dimensionality reduction with the k-means clustering algorithm to extract topics.</p>
      <p>Despite the instructive knowledge provided by the aforementioned machine learning studies, there are two identifiable gaps. First, most studies concentrated on <italic>public</italic> social media platforms, with the majority using Twitter as the data source [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. Investigations on closed social media platforms, such as WhatsApp, WeChat, Telegram, or LINE, remain extremely scarce. Secondly, most studies looked at posts via their high-level theme, such as “misconceptions and complaints about COVID-19 control” [<xref ref-type="bibr" rid="ref21">21</xref>], “psychological stress” [<xref ref-type="bibr" rid="ref17">17</xref>], or “government response” [<xref ref-type="bibr" rid="ref15">15</xref>]. There were limited efforts put into the study of individual narratives or rumors under a high-level theme, for example, rumors such as “protect yourself from coronavirus by putting bleach in your body” and “check for COVID-19 by holding your breath for 10 seconds or longer” under the theme of “erroneous health practices.”</p>
      <p>While high-level themes and sentiments can give us an overview of the public discourse, the capability to efficiently identify individual narratives would be extremely helpful for picking up trending rumors and claims. Discussions on social media platforms are most likely not independent from each other. Thus, simply looking at billions of individual messages is not effective for identifying what rumors are receiving attention. Therefore, there is an apparent need for an efficient way to group and extract the narratives to recognize the popular ones.</p>
      <p>Recognizing the limitations from previous studies and to solve the aforementioned problem, our goal was to use machine learning to identify individual COVID-19 rumors from a pool of social media messages, as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. After identifying the rumors, we then investigated the propagation patterns and textual transformation of those rumors on a closed platform. To achieve this, we proposed a classification-based clustering algorithm to efficiently group tens of thousands of messages according to the similarity of messages. Then, we applied the algorithm to the suspicious messages on LINE, a popular messaging platform in Taiwan. Furthermore, according to the clustering results, we investigated how the messages evolved from temporal and cultural perspectives during the pandemic. To the best of our knowledge, this is the first study to examine COVID-19 rumor diffusion on a closed platform.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>A graphical depiction of this study's goal in using machine learning to extract individual rumors from a pool of social media messages. MOHW: Ministry of Health and Welfare.</p>
        </caption>
        <graphic xlink:href="medinform_v9i11e30467_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Collection</title>
        <p>LINE is an instant messaging platform. According to the 2018 Taiwan Communication Survey (TCS), 98.5% of people in Taiwan used LINE as their primary messaging tool [<xref ref-type="bibr" rid="ref26">26</xref>], making it the most popular closed messaging platform. In light of the increasing amount of unreliable information being exchanged through LINE, fact-checking agencies or groups, such as the Taiwan FactCheck Center, Cofacts, or MyGoPen, have developed LINE chatbots for users to voluntarily forward suspicious messages. These chatbots archive the messages and check them against their existing databases to reply with the fact-checked results.</p>
        <p>We obtained a data set of suspicious messages forwarded by LINE users to a fact-checking LINE bot between January and July 2020. The data set included messages related to COVID-19 as well as other topics.</p>
        <p>Along with the text content of each reported message, we also obtained the report time of each message and a unique identifier for the LINE user that reported the message. The user identifiers we received were scrambled; therefore, it was not possible for us to use the identifiers to attribute any reported message back to any actual LINE user.</p>
      </sec>
      <sec>
        <title>Data Preprocessing</title>
        <p>After obtaining the text messages, we preprocessed them using the following steps. First, we removed all characters that were neither simplified Chinese nor traditional Chinese. Second, we tokenized each message using the Jieba library [<xref ref-type="bibr" rid="ref27">27</xref>] in Python (version 3.7; Python Software Foundation) and then removed tokens that were Chinese stop words from the token list. To focus on longer messages, we only kept messages with at least 20 tokens from our data set. Finally, the CountVectorizer module from Python’s scikit-learn package [<xref ref-type="bibr" rid="ref28">28</xref>] was used to create a binary word vector for each message.</p>
      </sec>
      <sec>
        <title>Clustering Messages Into Rumor Groups by the Classification-Based Clustering Algorithm</title>
        <p>In order to determine what messages belonged to the same rumor, we needed to define distance between messages. We wanted two messages, A and B, to be close to each other if the overlapping text between the two constituted the majority of both messages. When the overlapping text makes up the majority of A but not B, it signals that message A only constitutes a portion of B, meaning that B is likely a combination of several other rumors. In this situation, A and B should be in different groups; therefore, we would like the distance between them to be larger. Based on this idea, we defined the distance between two messages, A and B, to be as follows:</p>
        <graphic xlink:href="medinform_v9i11e30467_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>where <italic>tok</italic>(·) is the set of tokens of one message and &#124;·&#124; denotes the number of elements in a set.</p>
        <p>While most work relied on the LDA or k-means algorithm to separate messages into groups, both algorithms required a predefined number of final groups. That is, the users need to tell the algorithm how many groups to separate the messages into before being applied. Even though what we wanted to discover was how many narratives, or rumors, there were in all the messages by comparing the distance (equation 1) among all messages, such a requirement contradicted our needs. Hierarchical agglomerative clustering (HAC), on the other hand, starts by merging messages closer to one another into clusters and then iteratively merging closer clusters together until the distance between each cluster exceeds a predefined threshold. That is, instead of predefining a specific number of final groups like in LDA or k-means clustering, HAC determines the number of groups based on a predefined distance threshold. In addition, HAC has the advantage of accepting self-defined distance metrics. Therefore, HAC was the clustering algorithm that fitted our needs.</p>
        <p>However, HAC can be quite slow and memory consuming. It suffers with large data sets, especially in the case of social media messages. Therefore, we devised a classification-based clustering algorithm, one that combined the k-nearest neighbors (KNN) algorithm with HAC, to efficiently perform the clustering task. The idea was to randomly select a portion of messages on which to perform HAC; the result was then used to train a KNN algorithm. The trained KNN algorithm was subsequently used to predict the rest of the messages. A detailed algorithm is outlined in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>, and a flowchart of the algorithm is presented in <xref rid="figure2" ref-type="fig">Figure 2</xref>. The experimentation details are outlined in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, and we demonstrate the efficiency and effectiveness of this algorithm in the following subsection. The algorithm was implemented with the KNeighborsClassifiers and AgglomerativeClustering modules from the Python library scikit-learn [<xref ref-type="bibr" rid="ref28">28</xref>]. The library gensim (version 3.8.3) [<xref ref-type="bibr" rid="ref29">29</xref>] was also used in experiments to implement the LDA model for comparisons. We released the code to implement the model in a GitHub repository [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
        <boxed-text id="box1" position="float">
          <title>The classification-based clustering algorithm (hierarchical agglomerative clustering plus k-nearest neighbors algorithm).</title>
          <p>
            <bold>Notation:</bold>
          </p>
          <list list-type="order">
            <list-item>
              <p><italic>(A)<sub>j</sub></italic>: <italic>j</italic><sup>th</sup> element of set <italic>A</italic>.</p>
            </list-item>
          </list>
          <p>
            <bold>Input:</bold>
          </p>
          <list list-type="order">
            <list-item>
              <p><italic>D</italic>: the set of all documents to be grouped.</p>
            </list-item>
            <list-item>
              <p><italic>D<sup>T</sup></italic>: the set of tokenized documents. The order is preserved as <italic>D</italic>.</p>
            </list-item>
            <list-item>
              <p>Train portion <italic>u</italic>: a number &#62;0 and ≤1.</p>
            </list-item>
            <list-item>
              <p>Distance threshold λ: a number &#62;0 and ≤1. Throughout this paper, we set λ=0.6.</p>
            </list-item>
          </list>
          <p>
            <bold>Algorithm:</bold>
          </p>
          <list list-type="order">
            <list-item>
              <p>Select <italic>u</italic> × &#124;<italic>D<sup>T</sup></italic>&#124; elements from <italic>D<sup>T</sup></italic>, denoted as <italic>D<sup>T</sup><sub>u</sub></italic>, and the rest not selected as set <italic>D<sup>T</sup><sub>v</sub></italic>.</p>
            </list-item>
            <list-item>
              <p>Construct distance matrix <italic>M</italic> for <italic>D<sup>T</sup><sub>u</sub></italic>, where <italic>M<sub>ij</sub></italic> = <italic>d</italic>((<italic>D<sup>T</sup><sub>u</sub></italic>)<italic><sub>i</sub></italic>, (<italic>D<sup>T</sup><sub>u</sub></italic>)<italic><sub>j</sub></italic>) by equation 1. Note that <italic>M</italic> is symmetric.</p>
            </list-item>
            <list-item>
              <p>Feed <italic>M</italic> into hierarchical clustering with a distance threshold of λ. We will get back a sequence of labels <italic>L<sub>u</sub></italic>, where (<italic>L<sub>u</sub></italic>)<italic><sub>i</sub></italic> is the label of element (<italic>D<sup>T</sup><sub>u</sub></italic>)<italic><sub>i</sub></italic>. Elements with the same label are in the same cluster. Since the label itself does not carry meaning, manipulate them so they are all nonnegative whole numbers.</p>
            </list-item>
            <list-item>
              <p>For each unique label <italic>x</italic> in <italic>L<sub>u</sub></italic>, if &#124;{ <italic>k</italic> &#124; <italic>k</italic> = <italic>x</italic> ∀ <italic>k</italic> ∈ <italic>L<sub>u</sub></italic> }&#124; = 1, then replace the value of <italic>x</italic> with −1. Denote the updated label set as <italic>L’<sub>u</sub></italic>.</p>
            </list-item>
            <list-item>
              <p>Train a k-nearest neighbors classifier <italic>K</italic> using the training set (<italic>D<sup>T</sup><sub>u</sub></italic>, <italic>L’<sub>u</sub></italic>). Then use <italic>K</italic> to predict the labels of <italic>D<sup>T</sup><sub>v</sub></italic>. Denote the prediction as <italic>L<sub>v</sub></italic>.</p>
            </list-item>
            <list-item>
              <p>Construct <italic>L</italic> by combining <italic>L’<sub>u</sub></italic> and <italic>L<sub>v</sub></italic>, where (<italic>L</italic>)<italic><sub>i</sub></italic> is the label of (<italic>D<sup>T</sup></italic>)<italic><sub>i</sub></italic>.</p>
            </list-item>
            <list-item>
              <p>Construct <italic>D<sup>T</sup><sub>o</sub></italic> = {<italic>d<sub>i</sub></italic> &#124; Label (<italic>d<sub>i</sub></italic>) = –1 ∀ <italic>d<sub>i</sub></italic> ∈ <italic>D<sup>T</sup></italic>}.</p>
            </list-item>
            <list-item>
              <p>Redo steps 2 and 3 for <italic>D<sup>T</sup><sub>o</sub></italic>. Denote the output as <italic>L<sub>o</sub></italic>. Make sure the values of <italic>L<sub>o</sub></italic> do not overlap with the values of <italic>L</italic> from step 6.</p>
            </list-item>
            <list-item>
              <p>Update <italic>L</italic> from step 6 with <italic>L<sub>o</sub></italic>.</p>
            </list-item>
          </list>
          <p>
            <bold>Algorithm output:</bold>
          </p>
          <p>A list of labels <italic>L</italic>, where (<italic>L</italic>)<italic><sub>i</sub></italic> denotes the label of document (<italic>D</italic>)<italic><sub>i</sub></italic>. Note that the value of the label itself does not carry any meaning. However, elements in <italic>D<sup>T</sup></italic> with the same label belong to the same group.</p>
        </boxed-text>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Flowchart for the classification-based clustering algorithm (hierarchical agglomerative clustering + k-nearest neighbors algorithm).</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e30467_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Comparing the Classification-Based Clustering Algorithm With Other Popular Algorithms</title>
        <p>From <xref rid="figure3" ref-type="fig">Figure 3</xref>, we can see that the classification-based clustering algorithm, the HAC+KNN model, greatly reduced the runtime compared to using only HAC, especially when the train portion value <italic>u</italic> was less than 0.60. Furthermore, such a significant gain in speed did not compromise the clustering results. With the HAC model’s results as the gold standard to compare with, the precision values (<xref rid="figure4" ref-type="fig">Figure 4</xref>), recall values (<xref rid="figure5" ref-type="fig">Figure 5</xref>), and <italic>F</italic> scores (<xref rid="figure6" ref-type="fig">Figure 6</xref>) from the HAC+KNN model remained greater than 99% when the train portion <italic>u</italic> was not lower than 0.40. The results demonstrated that the HAC+KNN model’s assignments of groups were complete, as measured by recall, and the use of KNN did not introduce too many errors in each group, as measured by precision.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Speed comparison with 95% CIs between HAC and HAC+KNN across different levels of train portion <italic>u</italic>. HAC:  hierarchical agglomerative clustering; KNN: k-nearest neighbors.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e30467_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Precision values and 95% CIs (whiskers) of the HAC+KNN algorithm across different data set sizes and train portion <italic>u</italic>. HAC: hierarchical agglomerative clustering; KNN: k-nearest neighbors.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e30467_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Recall values and 95% CIs (whiskers) of the HAC+KNN algorithm across different data set sizes and train portion <italic>u</italic>. HAC: hierarchical agglomerative clustering; KNN: k-nearest neighbors.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e30467_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p><italic>F</italic> scores and 95% CIs (whiskers) of the HAC+KNN algorithm across different data set sizes and train portion <italic>u</italic>. HAC: hierarchical agglomerative clustering; KNN: k-nearest neighbors.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e30467_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>We observed that the runtime of the k-means clustering was 10 times slower than that of the HAC algorithm, and the LDA model’s runtime was the slowest among all models (<xref ref-type="table" rid="table1">Table 1</xref>). In addition, the precision of the LDA model was very low, meaning that predicted groups had many false positives. While the precision of the k-means model was comparable to that of the HAC+KNN model, recall was only 73%. This showed that the k-means model missed out on many messages.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Performance comparisons between HAC, HAC+KNN, LDA, and k-means models for data sets with 10,000 messages.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="200"/>
            <col width="150"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Runtime (seconds), mean (SD)</td>
                <td>Precision, mean (SD)</td>
                <td>Recall, mean (SD)</td>
                <td><italic>F</italic> score, mean (SD)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>HAC<sup>a</sup></td>
                <td>6.594 (0.245)</td>
                <td>N/A<sup>b</sup></td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>HAC+KNN<sup>c</sup> (<italic>u</italic>=0.2)</td>
                <td>2.172 (0.097)</td>
                <td>0.993 (0.003)</td>
                <td>0.982 (0.005)</td>
                <td>0.986 (0.004)</td>
              </tr>
              <tr valign="top">
                <td>HAC+KNN (<italic>u</italic>=0.4)</td>
                <td>2.502 (0.023)</td>
                <td>0.995 (0.001)</td>
                <td>0.996 (0.002)</td>
                <td>0.995 (0.001)</td>
              </tr>
              <tr valign="top">
                <td>HAC+KNN (<italic>u</italic>=0.6)</td>
                <td>3.418 (0.071)</td>
                <td>0.997 (0.001)</td>
                <td>0.998 (0.001)</td>
                <td>0.997 (0.001)</td>
              </tr>
              <tr valign="top">
                <td>HAC+KNN (<italic>u</italic>=0.8)</td>
                <td>4.697 (0.146)</td>
                <td>0.998 (0.001)</td>
                <td>0.999 (0.001)</td>
                <td>0.999 (0.001)</td>
              </tr>
              <tr valign="top">
                <td>LDA<sup>d</sup></td>
                <td>1788.981 (62.444)</td>
                <td>0.624 (0.029)</td>
                <td>0.939 (0.006)</td>
                <td>0.704 (0.023)</td>
              </tr>
              <tr valign="top">
                <td>K-means</td>
                <td>41.143 (1.334)</td>
                <td>0.993 (0.002)</td>
                <td>0.734 (0.011)</td>
                <td>0.823 (0.010)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>HAC: hierarchical agglomerative clustering.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>N/A: not applicable, because model does not include the parameter <italic>u</italic>.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>KNN: k-nearest neighbors.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>LDA: latent Dirichlet allocation.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Determining Whether a Rumor Is Related to COVID-19</title>
        <p>A rumor group contains many messages. To determine if a rumor group is related to COVID-19, we first identified how many messages in the group contained any of the COVID-19 keywords from the list that we put together (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>). Next, rumor groups with more than 60% of messages containing COVID-19–related keywords were passed to the authors to decide if such a rumor was really about COVID-19. If a rumor was deemed COVID-19–related, then all messages in the group were also deemed COVID-19–related, regardless of whether that message itself contained the keywords. Recognizing COVID-19–relatedness by close neighbors of each message is a more inclusive approach, as there were messages without the keywords that were obviously related to the pandemic; see <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> as an example.</p>
        <boxed-text id="box2" position="float">
          <title>A list of 33 COVID-19–related keywords.</title>
          <p>指揮中心, 奎寧, 急性呼吸道感染, 新型病毒, 疫情, 口罩, 負壓, 抗疫, 陽性, 新型冠狀病毒, 潛伏期, 李文亮, 纖維化, 自主管理, 群聚, 隔離, 確診, 武漢, 譚德塞, 陰性, 新冠, 染疫, 武肺, 封城, 肺炎, 自主健康管理, 防疫, 冠狀, 家庭感染, covid, ibuprofen, 2019-ncov, coronavirus</p>
        </boxed-text>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Set</title>
        <p>Our data set, after preprocessing, contained 114,124 messages. The character distribution is presented in <xref ref-type="table" rid="table2">Table 2</xref>, and the number of messages reported per date is shown in <xref rid="figure7" ref-type="fig">Figure 7</xref>.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Breakdown of characters in the data set of 114,124 suspicious messages.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="160"/>
            <col width="160"/>
            <col width="160"/>
            <col width="160"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Statistic</td>
                <td colspan="5">Type of character</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>All</td>
                <td>Chinese</td>
                <td>Digit</td>
                <td>Alphabetical</td>
                <td>Others<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Minimum, n</td>
                <td>24</td>
                <td>24</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Median (IQR)</td>
                <td>233 (333)</td>
                <td>145 (225)</td>
                <td>7 (17)</td>
                <td>2 (22)</td>
                <td>38 (79)</td>
              </tr>
              <tr valign="top">
                <td>Maximum, n</td>
                <td>10,012</td>
                <td>8132</td>
                <td>3252</td>
                <td>7014</td>
                <td>5532</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>This category includes characters such as punctuation marks and emojis.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Distribution of 114,124 messages by report dates.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e30467_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Rumor Group Overview</title>
        <p>By using the HAC+KNN algorithm, 114,124 messages were separated into 12,260 rumor groups. A total of 8529 rumor groups had only 1 message. Therefore, the rest of the 105,595 messages were separated into 3731 rumor groups. There were 936 rumor groups with at least 10 messages, with the largest one having 2546 messages. We present the statistics of the rumor group sizes in <xref ref-type="table" rid="table3">Table 3</xref>.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Statistics of the rumor group sizes.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="360"/>
            <col width="150"/>
            <col width="120"/>
            <col width="140"/>
            <col width="140"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td>Minimum number of messages per rumor group</td>
                <td colspan="5">Messages</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mean (SD)</td>
                <td>Maximum, n</td>
                <td>3rd quartile, n</td>
                <td>2nd quartile, n</td>
                <td>Total, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>9.309 (71)</td>
                <td>2546</td>
                <td>2</td>
                <td>1</td>
                <td>114,124</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>28.302 (126.907)</td>
                <td>2546</td>
                <td>10</td>
                <td>3</td>
                <td>105,595</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>102.96 (238.31)</td>
                <td>2546</td>
                <td>75</td>
                <td>27</td>
                <td>96,373</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>Among 936 rumor groups with at least 10 messages, we identified 396 (42.3%) that were related to COVID-19; these consisted of a total of 42,829 messages. Among 396 COVID-19–related rumor groups, 134 (33.8%) were deemed false or misleading by either the Taiwan FactCheck Center or MyGoPen, two International Fact-Checking Network (IFCN)–certified fact-checking agencies in Taiwan.</p>
        <p>After recognizing many messages containing simplified Chinese characters or phrases originating from China, we compared the prevalence of those characters and phrases between COVID-19–related and non–COVID-19–related messages. Compared to non–COVID-19–related messages, the pool of COVID-19–related messages had significantly more messages using simplified Chinese characters or phrases that originated from China (<xref ref-type="table" rid="table4">Table 4</xref>). The association was significant as determined by the chi-square independence test with Yates’ continuity correction (χ<sup>2</sup><sub>1</sub>=1088.0, n=96,373; <italic>P</italic>&#60;.001).</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Contingency table of COVID-19–relatedness using simplified Chinese characters or phrases originating from China.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="220"/>
            <col width="310"/>
            <col width="310"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Message type</td>
                <td colspan="2">Messages with simplified Chinese characters or phrases originating from China, n</td>
                <td>Total messages, n</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Yes</td>
                <td>No</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Related to COVID-19</td>
                <td>16,957</td>
                <td>25,872</td>
                <td>42,829</td>
              </tr>
              <tr valign="top">
                <td>Not related to COVID-19</td>
                <td>15,776</td>
                <td>37,768</td>
                <td>53,544</td>
              </tr>
              <tr valign="top">
                <td>Total</td>
                <td>32,733</td>
                <td>63,640</td>
                <td>96,373</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>The COVID-19–related rumor group sizes had a very long-tailed distribution (<xref rid="figure8" ref-type="fig">Figure 8</xref>). Most of the rumor groups only contained a few messages. In fact, only 15 rumor groups contained more than 1000 messages. In the following subsection, we discuss how we qualitatively analyzed the three COVID-19 rumor groups with the largest number of messages.</p>
        <fig id="figure8" position="float">
          <label>Figure 8</label>
          <caption>
            <p>Empirical cumulative distribution function of the number of messages in COVID-19–related rumor groups.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e30467_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Case Studies of the Three Largest COVID-19–Related Rumor Groups</title>
        <sec>
          <title>Overview</title>
          <p>We qualitatively analyzed the three rumor groups with the largest number of messages among the 936 COVID-19–related rumor groups. In fact, a total of 7523 messages from the three rumor groups made up 17.6% of all 42,829 COVID-19–related messages.</p>
          <p>To study the interactions of the rumors’ popularity with society, we picked out some major societal events, as shown in <xref ref-type="table" rid="table5">Table 5</xref>. While there were multiple important events regarding the pandemic every day, we picked out incidents that were the first occurrences.</p>
          <table-wrap position="float" id="table5">
            <label>Table 5</label>
            <caption>
              <p>Major societal events related to COVID-19.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="190"/>
              <col width="810"/>
              <thead>
                <tr valign="top">
                  <td>Date (year 2020)</td>
                  <td>Events</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>February 9</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>First asymptomatic laboratory-confirmed COVID-19 case in Taiwan</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>February 15</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>First COVID-19 death case in Taiwan</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>February 21</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Passengers on Diamond Princess cruise ship returned to Taiwan</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>March 11</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>COVID-19 declared a global pandemic by the World Health Organization</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>March 18</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>The director of the CECC<sup>a</sup>, Chen Shih-Chung, went to the Legislative Yuan for interpellation about the pandemic for the first time</p>
                      </list-item>
                      <list-item>
                        <p>A total of 100 confirmed cases was reached; single-day confirmed cases hit record high for 3 consecutive days</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>March 26</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>The CECC released the first report on the analysis of confirmed cases in Taiwan</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>March 30</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>First death case in Taiwan’s first hospital cluster infection</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>April 1</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>The day before a 4-day long weekend</p>
                      </list-item>
                      <list-item>
                        <p>First day of mask requirement on public transportation</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>April 5</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>The last day of a 4-day long weekend</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table5fn1">
                <p><sup>a</sup>CECC: Central Epidemic Command Center.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Case 1: Do Not Go Outside!</title>
          <p>The rumor content for Case 1 is presented in <xref ref-type="boxed-text" rid="box3">Textbox 3</xref>. This rumor first appeared in the data set on February 2, 2020. Over the course of 3.5 months, there were a total of 2119 messages reported. The reported messages went viral at least four times: they peaked on February 22 with 80 messages, they peaked on March 16 with 68 messages, they reached the highest number on April 2 with 205 messages, and they peaked on April 7 with 197 messages (<xref rid="figure9" ref-type="fig">Figure 9</xref>). During this period, we observed several content changes (<xref ref-type="table" rid="table6">Table 6</xref>).</p>
          <boxed-text id="box3" position="float">
            <title>Content of Case 1 rumor.</title>
            <p>English translation:</p>
            <p>Academian Zhong Nan-Shan emphasized again, “Do not go outside! At least wait until the Lantern Festival.” Be warned that even if cured, you would suffer the rest of your life. This is a plague worse than SARS. The side effects of the drugs are more severe. Even if there is special medicine, it could only save your life, nothing more. Think about your family before stepping outside...This is a war, not a game...No one is an outsider in this war...Please share it with others. By Zhong Nan-Shan.</p>
            <p>Original content:</p>
            <p>鐘南山院士再次強調：別出門，元宵後，再看疫情控制情況！警告：一旦染上，就算治癒了，後遺症也會拖累後半生！這場瘟疫比17年前的非典更嚴重，用的藥副作用更大。如果出了特效藥，也只能保命，僅此而已！出門前想想你的家人，別連累家人，能不出門就不出門，大家一起轉發吧！這是一場戰役，不是兒戲，收起你盲目的自信和僥倖心理，也收起你事不關己高高掛起的態度，在這場戰役中沒有局外人！在家！在家！在家！不要點贊！求轉發——鐘南山</p>
          </boxed-text>
          <fig id="figure9" position="float">
            <label>Figure 9</label>
            <caption>
              <p>The number of Case 1 (ie, "Do not go outside!") messages reported by date. The number peaked on April 2 with 205 messages, when messages started misquoting the Central Epidemic Command Center (CECC) director. There were also a large number of reports after a 4-day long weekend on April 6 with 166 messages and on April 7 with 197 messages. Refer to Table 5 for major societal events.</p>
            </caption>
            <graphic xlink:href="medinform_v9i11e30467_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <table-wrap position="float" id="table6">
            <label>Table 6</label>
            <caption>
              <p>Change log for Case 1 rumor content.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="140"/>
              <col width="370"/>
              <col width="490"/>
              <thead>
                <tr valign="top">
                  <td>Date (year 2020)</td>
                  <td colspan="2">English translation (original)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Previous content</td>
                  <td>New content</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>February 17</td>
                  <td>Academian Zhong Nan-Shan emphasized<break/>(鍾南山院士再次強調)</td>
                  <td>Pandemic expert from Mainland China, Academian Zhong Nan-Shan emphasized<break/>(大陸防疫專家鍾南山院士再次強調)</td>
                </tr>
                <tr valign="top">
                  <td>February 18</td>
                  <td>Academian Zhong Nan-Shan emphasized<break/>(鍾南山院士再次強調)</td>
                  <td>Coronavirus expert from Mainland China, 78-year-old Academian Zhong Nan-Shan emphasized<break/>(大陸，冠狀病毒專家鐘南山78歲院士再次強調)</td>
                </tr>
                <tr valign="top">
                  <td>February 27</td>
                  <td>Academian Zhong Nan-Shan emphasized<break/>(鍾南山院士再次強調)</td>
                  <td>Coronavirus expert from Mainland China, 84-year-old Academian Zhong Nan-Shan emphasized<break/>(大陸，冠狀病毒專家鐘南山84歲院士再次強調)</td>
                </tr>
                <tr valign="top">
                  <td>April 1</td>
                  <td>Academian Zhong Nan-Shan emphasized<break/>(鍾南山院士再次強調)</td>
                  <td>Minister of Taiwan’s MOHW<sup>a</sup>, Chen Shih-Chung, reminded everyone<break/>(台灣衛福部長陳時中提醒大家)</td>
                </tr>
                <tr valign="top">
                  <td>February 18</td>
                  <td>Do not go outside! At least wait until the Lantern Festival.<break/>(別出門，元宵後，再看疫情控制情況)</td>
                  <td>Do not go outside! At least wait until the Dragon Boat Festival.<break/>(別出門，端午節過後，再看疫情控制情況)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table6fn1">
                <p><sup>a</sup>MOHW: Ministry of Health and Welfare.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <p>First, the time-sensitive information in the messages evolved. In early February, most messages mentioned “Lantern Festival,” which took place on February 8, 2020. However, from February 18 onward, there were messages that replaced “Lantern Festival” with “March.” Then, after March 10, most messages included “Dragon Boat Festival,” which took place on June 25, 2020.</p>
          <p>Second, among 2119 reported messages, 2095 (98.9%) falsely quoted authority. Zhong Nan-Shan, the leader of China’s National Health Commission’s expert panel for investigating the COVID-19 outbreak in China, and Chen Shih-Chung, the director of the Central Epidemic Command Center (CECC)—the two most popular misquoted targets—showed up in 975 (46.5%) and 1117 (53.3%) messages, respectively. Efforts were made to emphasize the authoritativeness of the quoted party as well. For example, titles for Zhong Nan-Shan became longer, from “Expert in Pandemic from Mainland China” and “Expert in Coronavirus” to “Expert in Coronavirus from Mainland China, 78-year-old Academian Zhong Nan-Shan.” Starting from April 1, 2020, every reported message had Zhong replaced with Chen Shih-Chung (<xref rid="figure10" ref-type="fig">Figure 10</xref>). As the Minister of the Ministry of Health and Welfare (MOHW) and director of Taiwan’s CECC, Chen’s popularity skyrocketed during the pandemic through his daily press conferences.</p>
          <fig id="figure10" position="float">
            <label>Figure 10</label>
            <caption>
              <p>Chen Shih-Chung replaced Zhong Nan-Shan as the most quoted party in the Case 1 rumor after April 1, 2020.</p>
            </caption>
            <graphic xlink:href="medinform_v9i11e30467_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>Due to the prevalence of this message spreading on the web and closed platforms, the MOHW and the CECC both sent out a press release [<xref ref-type="bibr" rid="ref31">31</xref>] on April 2, 2020, reminding the public that this was misinformation. Nevertheless, this did not stop another viral spread of the same message at the end of a 4-day long weekend holiday in Taiwan, where crowds were seen at every tourist attraction on the island. For days, people worried that the long weekend would lead to another outbreak of the pandemic, providing an explanation as to why the message bearing the key topic “do not go out” would become a big hit.</p>
        </sec>
        <sec>
          <title>Case 2: Drinking Salt Water Can Prevent the Spread of COVID-19</title>
          <p>This rumor promoted drinking salt water to prevent COVID-19. Interestingly, this rumor was actually the combination of two individual rumors (<xref ref-type="table" rid="table7">Table 7</xref>). Message B had a peak on March 27 with 265 messages, and Message A+B received the most attention on March 30 with 523 messages (<xref rid="figure11" ref-type="fig">Figure 11</xref>).</p>
          <table-wrap position="float" id="table7">
            <label>Table 7</label>
            <caption>
              <p>Content of Case 2 rumor.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="100"/>
              <col width="480"/>
              <col width="420"/>
              <thead>
                <tr valign="top">
                  <td>Message</td>
                  <td>English translation</td>
                  <td>Original content</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>A</td>
                  <td>This is 100% accurate...Why did we see a huge decline of confirmed cases in China during the last few days? They simply forced their citizens to rinse mouths with salted water three times a day and then drink water for 5 minutes. The virus would attack throats before the lungs, and when getting in touch with salted water, the virus would die or get destroyed in lungs. This is the only way to prevent the spread of COVID-19. There is no need to buy medicine as there is nothing effective on the market.</td>
                  <td>這是100%準確的信息... 為什麼中國過去幾天大大減少了感染人數？他們只是簡單地強迫他們的人民每天漱口3次鹽水.完成後，喝水5分鐘.因為該病毒只能在喉嚨中侵襲，然後再侵襲肺部，當受到鹽水侵襲時，該病毒會死亡或從胃中流下來並在胃中銷毀，這是預防冠狀病毒流行的唯一方法.市場上沒有藥品，所以不要購買.</td>
                </tr>
                <tr valign="top">
                  <td>B</td>
                  <td>Before reaching the lungs, the novel coronavirus would survive in throats for 4 days. At this stage, people would experience sore throats and start coughing. If one can drink as much warm water with salt and vinegar as they can, the virus could be destroyed. Share this information to save people’s lives.</td>
                  <td>新冠肺炎在還沒有來到肺部之前，它會在喉嚨部位存活4天.在這個時候,人們會開始咳嗽及喉嚨痛.如果他能儘量喝多溫開水及鹽巴或醋,就能消滅病菌.儘快把此訊息轉達一下，因爲你會救他人一命！</td>
                </tr>
                <tr valign="top">
                  <td>A+B</td>
                  <td>Why did Mainland China show a huge decline of confirmed cases over the last few days? Besides wearing masks and washing hands, they simply rinse mouths with salted water three times a day and then drink water for 5 minutes...Dr Wang of Tung Hospital stated that the novel coronavirus would survive in throats for 4 days before reaching the lungs...If one can drink as much warm water with salt and vinegar as they can, the virus could be destroyed...</td>
                  <td>為什麼中國大陸過去幾天大大減少了感染人數？除了戴口罩勤洗手外，他們只是簡單地每天漱口3次鹽水.完成後，喝水5分鐘... 新冠肺炎在還沒有來到肺部之前，它會在喉嚨部位存活4天... 如果他能儘量喝多溫開水及鹽巴或醋，就能消滅病菌...</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <fig id="figure11" position="float">
            <label>Figure 11</label>
            <caption>
              <p>The number of Case 2 (ie, "Drinking salt water can prevent the spread of COVID-19") messages reported by date. The rumor had been fact-checked rather early; however, the information still received widespread attention. Message B peaked on March 27 with 265 messages, and the combined message peaked on March 30 with 523 messages. Refer to Table 5 for major societal events.  Refer to Table 7 for contents of Message A, B, and A+B.</p>
            </caption>
            <graphic xlink:href="medinform_v9i11e30467_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>Among 3283 reported messages, 3093 (94.2%) misquoted medical professionals. The most popular misquoted parties were Dr Wang of Tung Hospital and the director of the Veteran Hospital<italic>,</italic> each seen in 2340 (71.3%) and 753 (22.9%) messages, respectively.</p>
          <p>Drinking salt water to prevent COVID-19 was a popular false claim about COVID-19 internationally. This rumor was fact-checked several times in March by Taiwan’s fact-checking agencies [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>], and even the WHO had fact-checked a similar claim about rinsing noses with saline [<xref ref-type="bibr" rid="ref34">34</xref>]. However, this did not stop this piece of misinformation from receiving attention (<xref rid="figure11" ref-type="fig">Figure 11</xref>). In fact, several translations of the combined rumor (ie, Message A+B) were observed in April. The translations included English, Indonesian, Filipino, and Tibetan.</p>
          <p>The lifespan of this “drink salt water” rumor was rather long. One famous fact-checking platform in Taiwan, MyGoPen, released an article to disprove this false medical advice again in October 2020 [<xref ref-type="bibr" rid="ref35">35</xref>], 7 months after it was first seen in our data set.</p>
        </sec>
        <sec>
          <title>Case 3: This Is a Critical Period; Here Are Some Suggestions</title>
          <p>This rumor mentioned that Taiwan “entered a critical period of the pandemic” and provided a list of suggested measures for people to follow (<xref ref-type="boxed-text" rid="box4">Textbox 4</xref>). Some of the suggestions made sense in terms of personal hygiene, while others were without basis. This rumor first appeared in the data set on February 6 and included a total of 2121 messages. Over the 1.5 months of its most popular period, it went viral at least three times: February 10 with 120 messages, February 17 with 394 messages, and March 19 with 543 messages (<xref rid="figure12" ref-type="fig">Figure 12</xref>).</p>
          <boxed-text id="box4" position="float">
            <title>Content of the Case 3 rumor.</title>
            <p>English translation:</p>
            <p>10 days from now, Taiwan will be in a critical period to combat COVID-19. Here are some suggested measures.</p>
            <p>1. Strictly prohibit going to public places. 2. Take out from restaurants. 3. Eat outside in open spaces. 4. Wash your hands the right way (extremely important). 5. When taking the subway or bus, choose the seats at the front half of the vehicle. 6. Do not wear contact lenses. 7. Eat warm food and more vegetables. 8. Avoid constipation. 9. Drink warm water. 10. Do not visit hair salons. 11. Hang the clothes you’re wearing outside for two hours the first moment you get home. 12. Do not wear jewelry. 13. Wash your hands immediately after touching cash or coins. Put coins you just received inside a plastic bag for one day before using them. 14. Do not use a colleague’s phone when working. If you have to, disinfect before using. 15. Avoid taking public transportation during rush hour. 16. Do not visit night markets or traditional markets. 17. Exercise. 18. Avoid going to the gym.</p>
            <p>Original content:</p>
            <p>今天開始10天，台灣正式進入武漢肺炎関鍵期。建議如下: 1.嚴禁進入公共場所. 2.用餐儘量將食物外帶.3.用餐環境儘量在外. 4.正確方式的洗手(特別重要). 5.坐捷運(公車)，選擇在車前頭. 6.避免戴隱形眼鏡 7.吃熱食,避開生凉食物,多吃蔬菜 8.保持腸胃暢通. 9.多喝溫水. 10.暫停去髮廊. 11.穿過的衣服(外套,長褲),回家先單獨吊在外2小時 12.暫停戴首飾. 13. 一有接觸錢幣,一定要洗手,剛拿進來的錢幣,先單獨放在塑膠袋中,一天後,才拿出來. 14.在公司不要使用別人的電話筒.電話筒需消毒.15. 避開巔峰時間坐車. 16.不去傳統市場及夜市. 17.適當的運動.18.暫停進入健身房.</p>
          </boxed-text>
          <fig id="figure12" position="float">
            <label>Figure 12</label>
            <caption>
              <p>The number of Case 3 (ie, "This is a critical period; here are some suggestions") messages reported by date. The rumor was fact-checked several times in early February. However, higher peaks were still seen later on February 17 with 394 messages and, after a month, on March 19 with 543 messages. Refer to Table 5 for major societal events.</p>
            </caption>
            <graphic xlink:href="medinform_v9i11e30467_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>Among 2121 reported messages, 1778 (83.8%) misquoted authorities as <italic>endorsing</italic> the rumor. The Taiwan Medical Association and the CECC director, Chen Shih-Chung, were the most misquoted parties, each seen in 1637 (77.2%) and 393 (18.5%) messages, respectively (<xref rid="figure13" ref-type="fig">Figure 13</xref>). A major revision of the rumor appeared on February 12 (<xref ref-type="table" rid="table8">Table 8</xref>), 6 days after the first message. In the revision, the original 18 bullets were pruned to 14, removing the ones that were perhaps more ridiculous or hard to follow. Strong words were also modified to a gentler tone. The Taiwan Medical Association, the most misquoted party, also first appeared in the message.</p>
          <fig id="figure13" position="float">
            <label>Figure 13</label>
            <caption>
              <p>The Taiwan Medical Association (TMA) was quoted in almost every message in this rumor group, even though the TMA released a statement on February 12, 2020, saying that they did not endorse the material. Later, after Chen Shih-Chung went to Legislative Yuan on March 18, the same rumor started misquoting him.</p>
            </caption>
            <graphic xlink:href="medinform_v9i11e30467_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <table-wrap position="float" id="table8">
            <label>Table 8</label>
            <caption>
              <p>Change log for the Case 3 rumor content.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="100"/>
              <col width="480"/>
              <col width="420"/>
              <thead>
                <tr valign="top">
                  <td>Date</td>
                  <td colspan="2">English translation (original)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Previous content</td>
                  <td>New content</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="2">
                    <bold>February 12, 2020</bold>
                  </td>
                  <td>
                    <break/>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Strictly prohibit going to public places. (嚴禁進入公共場所.)</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Reduce going to public places. (減少進入公共場所.)</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Eat outside in open spaces. (用餐環境儘量在外.)</p>
                      </list-item>
                      <list-item>
                        <p>When taking the subway or bus, choose the seats at the front half of the vehicle. (坐捷運(公車)，選擇在車前頭.)</p>
                      </list-item>
                      <list-item>
                        <p>Do not visit hair salons. (暫停去髮廊.)</p>
                      </list-item>
                      <list-item>
                        <p>Do not visit night markets or traditional markets. (不去傳統市場及夜市.)</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Content was deleted</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>No previous content</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Regards from the Taiwan Medical Association. (醫師全聯會關心您.)</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td colspan="2">
                    <bold>March 18, 2020</bold>
                  </td>
                  <td>
                    <break/>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>10 days from now, Taiwan will be in a critical period to combat COVID-19. Here are some suggested measures... (今天起10天，台灣正式進入武漢肺炎関鍵期，建議如下...)</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>10 days from now, Taiwan will be in a critical period to combat COVID-19 (explained by Chen Shih-Chung in the Legislative Yuan on March 18, 2020). Here are some suggested measures... (今天起10天，台灣正式進入武漢肺炎関鍵期，(3/18陳時中立法院說明) 建議如下...)</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Regards from the Taiwan Medical Association. (醫師全聯會關心您.)</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Content was deleted</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <p>After almost a month with only a few messages circulating (<xref rid="figure12" ref-type="fig">Figure 12</xref>), on March 18, the CECC director, Chen Shih-Chung, went to the Legislative Yuan (similar to the US Congress) for interpellation about the pandemic. Chen started to be quoted in messages on the same day, making the “suggested measures” look like they were said by Chen during his interpellation (<xref ref-type="table" rid="table8">Table 8</xref>). The next day, on March 19, the reported message count skyrocketed to the highest peak. Of the 543 messages reported on March 19, 280 (51.2%) misquoted Chen.</p>
          <p>Several fact-checking agencies published reports pointing out the falsity of the message [<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>] between February 10 and 15 (<xref rid="figure12" ref-type="fig">Figure 12</xref>). The Taiwan Medical Association, which was misquoted in 1637 out of 2121 (77.2%) messages, also released a clarifying statement on February 12 [<xref ref-type="bibr" rid="ref39">39</xref>], stating explicitly that they did not endorse the material. However, similar to what we observed in the previous two cases, such fact-checking efforts did not prevent the rumor from getting widespread attention later. Rather, societal events might have played a larger role in the popularity of the rumor. For example, the spike on February 17 (<xref rid="figure12" ref-type="fig">Figure 12</xref>) was preceded by the first COVID-19 death case and a local cluster in Taiwan. A taxi driver tested positive for the virus and died the same day on February 15. Over the next few days, four of the driver’s family members also tested positive, forming the first local cluster of COVID-19 infection in Taiwan. The highest spike on March 19 was preceded by the CECC director’s interpellation in the Legislative Yuan, the event after which the messages started misquoting the director.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>First, we demonstrated that by using a combination of HAC and KNN algorithms, we could efficiently separate a large number of social media text messages into fine-grained narratives, or <italic>rumors</italic>. The addition of the KNN classification algorithm enabled the speedup and, at the same time, achieved near-equivalent results compared to using HAC alone. Hence, this classification-based clustering algorithm could enable future large-scale studies of rumor transformation with social media post content.</p>
        <p>We identified 396 rumors related to COVID-19 from the pool of 114,124 suspicious messages collected from the LINE platform between January and July 2020. Among the COVID-19–related rumors, more than one-third were deemed false or misleading by IFCN-certified fact-checking agencies in Taiwan. Compared to non–COVID-19–related messages, COVID-19–related messages were more likely to contain simplified Chinese characters or phrases originating from China. The association was statistically significant. As the official language in Taiwan is traditional Chinese, the result suggested that COVID-19–related messages were more likely to have originated from non-Taiwanese users than the non–COVID-19–related messages.</p>
        <p>We qualitatively investigated three COVID-19–related rumors with the highest number of messages and observed several commonalities among these highly popular rumors. First, a significant number of messages from all three rumor groups misquoted key authoritative figures. Given the nature of the pandemic, the authorities were usually medical personnel. At times, a change in the quoted authority figures signaled a paradigm shift, indicating whom the public looked up to, for example, from Zhong Nan-Shan to Chen Shih-Chung. At other times, the quoted party did not seem to make any sense. For example, Dr Wang in Case 2 was in fact an orthopedist, a specialty not directly related to COVID-19. Second, in all three rumors, we observed spikes in reported messages even after several fact-checking agencies released reports that deemed the content false or misleading. Echoing the findings of Wood and Porter [<xref ref-type="bibr" rid="ref40">40</xref>], the current practice of fact-checking did not seem to effectively stop the false information from getting widespread attention later. In fact, by identifying major societal events preceding each resurfacing peak, we asserted that resurfacing patterns were more influenced by major societal events and textual transformation. However, each peak of popularity would not last long, and it was often without good explanation about how one wave of attention ended.</p>
        <p>Our work offers several insights into the landscape of misinformation in a closed platform as well as the behaviors of some popular COVID-19 rumors. These characteristics could serve as rules to discover possible false information as early detection mechanisms. Although we identified these characteristics manually in this study, it is quite possible to employ techniques such as NLP to automatically recognize these textual changes in the future, making it possible to have an automatic early warning system of possible misinformation before fact-checking efforts by professionals.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Our work adds to the limited collection of COVID-19 infodemic studies in closed platforms [<xref ref-type="bibr" rid="ref41">41</xref>]. Compared with other rumor diffusion studies, such as the study of 17 political rumors by Shin et al [<xref ref-type="bibr" rid="ref42">42</xref>], this work provided an efficient machine learning algorithm that could enable large-scale rumor evolution studies on social media platforms in the future. In comparison to other machine learning applications in COVID-19 infodemic studies, this work focused on fine-grained narratives, or <italic>rumors</italic>, rather than high-level topics, in order to study individual rumor propagation. To the best of our knowledge, this is the first study to examine rumor diffusion and propagation patterns of COVID-19 misinformation on a closed platform.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study had several limitations. First, the data were collected by LINE users’ reports. Therefore, it was impossible to infer the true distribution of messages without making some assumptions. For example, if there was more health-related misinformation in our data, it did not necessarily translate to more health-related rumors circulating in the platform. In fact, it could also be that people were more alert and skeptical of health-related information. Second, we only looked at text messages. Therefore, information distributed visually or in audio form was not covered. Lastly, our algorithm for grouping messages does not work well with short texts.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>While social media may give rise to an unprecedented number of unverified rumors, it also provides a unique opportunity to study rumor propagation. In fact, to combat the infodemic, we need to first understand how and why some rumors became popular. In our studies, we proposed an algorithm that enables the research community to perform large-scale studies on the evolution of text messages at the rumor level rather than at the topic level. Moreover, we showed textual commonalities in widespread rumors in Taiwan during COVID-19. We also showed that the attention one rumor received was connected to major societal events and content changes. To the best of our knowledge, this is one of the few studies that has examined COVID-19 misinformation on a closed messaging platform and the first to examine the textual evolution of COVID-19–related rumors during their propagation. We hope that this will further spark more studies in rumor propagation patterns as an effort to fight the infodemic.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Experiment setup for comparing algorithms.</p>
        <media xlink:href="medinform_v9i11e30467_app1.docx" xlink:title="DOCX File , 1097 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>An example message without any of the COVID-19 keywords, but that could be identified as COVID-19–related by a close neighbor.</p>
        <media xlink:href="medinform_v9i11e30467_app2.docx" xlink:title="DOCX File , 705 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CECC</term>
          <def>
            <p>Central Epidemic Command Center</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">HAC</term>
          <def>
            <p>hierarchical agglomerative clustering</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">IFCN</term>
          <def>
            <p>International Fact-Checking Network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">KNN</term>
          <def>
            <p>k-nearest neighbors</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LDA</term>
          <def>
            <p>latent Dirichlet allocation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MOHW</term>
          <def>
            <p>Ministry of Health and Welfare</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">TCS</term>
          <def>
            <p>Taiwan Communication Survey</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">VADER</term>
          <def>
            <p>Valence Aware Dictionary and Sentiment Reasoner</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">XGBoost</term>
          <def>
            <p>Extreme Gradient Boosting</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We cited a survey study where the data were collected by a research project of the TCS [<xref ref-type="bibr" rid="ref26">26</xref>]. The TCS project is supported by the Ministry of Science and Technology of the Republic of China. We appreciate the assistance by the institute in providing data. The views expressed herein are the authors’ own. This work was supported by the Information Operations Research Group, which was funded by the Institute for War and Peace Reporting and the Taiwan Foundation for Democracy. The funders played no role in the design, implementation, or reporting of this research.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Del Vicario</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bessi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zollo</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Petroni</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Scala</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Caldarelli</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Stanley</surname>
              <given-names>HE</given-names>
            </name>
            <name name-style="western">
              <surname>Quattrociocchi</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>The spreading of misinformation online</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>2016</year>
          <month>01</month>
          <day>19</day>
          <volume>113</volume>
          <issue>3</issue>
          <fpage>554</fpage>
          <lpage>559</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.pnas.org/cgi/pmidlookup?view=long&#38;pmid=26729863"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.1517441113</pub-id>
          <pub-id pub-id-type="medline">26729863</pub-id>
          <pub-id pub-id-type="pii">1517441113</pub-id>
          <pub-id pub-id-type="pmcid">PMC4725489</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lazer</surname>
              <given-names>DMJ</given-names>
            </name>
            <name name-style="western">
              <surname>Baum</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Benkler</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Berinsky</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Greenhill</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Menczer</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Metzger</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Nyhan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Pennycook</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rothschild</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Schudson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sloman</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Sunstein</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Thorson</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Watts</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Zittrain</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>The science of fake news</article-title>
          <source>Science</source>
          <year>2018</year>
          <month>03</month>
          <day>09</day>
          <volume>359</volume>
          <issue>6380</issue>
          <fpage>1094</fpage>
          <lpage>1096</lpage>
          <pub-id pub-id-type="doi">10.1126/science.aao2998</pub-id>
          <pub-id pub-id-type="medline">29590025</pub-id>
          <pub-id pub-id-type="pii">359/6380/1094</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vosoughi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Aral</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The spread of true and false news online</article-title>
          <source>Science</source>
          <year>2018</year>
          <month>03</month>
          <day>09</day>
          <volume>359</volume>
          <issue>6380</issue>
          <fpage>1146</fpage>
          <lpage>1151</lpage>
          <pub-id pub-id-type="doi">10.1126/science.aap9559</pub-id>
          <pub-id pub-id-type="medline">29590045</pub-id>
          <pub-id pub-id-type="pii">359/6380/1146</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abdoli</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Gossip, rumors, and the COVID-19 crisis</article-title>
          <source>Disaster Med Public Health Prep</source>
          <year>2020</year>
          <month>08</month>
          <volume>14</volume>
          <issue>4</issue>
          <fpage>e29</fpage>
          <lpage>e30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32713376"/>
          </comment>
          <pub-id pub-id-type="doi">10.1017/dmp.2020.272</pub-id>
          <pub-id pub-id-type="medline">32713376</pub-id>
          <pub-id pub-id-type="pii">S1935789320002724</pub-id>
          <pub-id pub-id-type="pmcid">PMC7443554</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tasnim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hossain</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Mazumder</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Impact of rumors and misinformation on COVID-19 in social media</article-title>
          <source>J Prev Med Public Health</source>
          <year>2020</year>
          <month>05</month>
          <volume>53</volume>
          <issue>3</issue>
          <fpage>171</fpage>
          <lpage>174</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.3961/jpmph.20.094"/>
          </comment>
          <pub-id pub-id-type="doi">10.3961/jpmph.20.094</pub-id>
          <pub-id pub-id-type="medline">32498140</pub-id>
          <pub-id pub-id-type="pii">jpmph.20.094</pub-id>
          <pub-id pub-id-type="pmcid">PMC7280809</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
          <source>Novel Coronavirus (2019-nCoV): Situation Report - 13</source>
          <year>2020</year>
          <month>02</month>
          <day>02</day>
          <access-date>2021-06-01</access-date>
          <publisher-loc>Geneva, Switzerland</publisher-loc>
          <publisher-name>World Health Organization</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/docs/default-source/coronaviruse/situation-reports/20200202-sitrep-13-ncov-v3.pdf">https://www.who.int/docs/default-source/coronaviruse/situation-reports/20200202-sitrep-13-ncov-v3.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mubeen</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Kamal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kamal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Balkhi</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Knowledge and awareness regarding spread and prevention of COVID-19 among the young adults of Karachi</article-title>
          <source>J Pak Med Assoc</source>
          <year>2020</year>
          <month>05</month>
          <volume>70(Suppl 3)</volume>
          <issue>5</issue>
          <fpage>S169</fpage>
          <lpage>S174</lpage>
          <pub-id pub-id-type="doi">10.5455/JPMA.40</pub-id>
          <pub-id pub-id-type="medline">32515406</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mat Dawi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Namazi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hwang</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ismail</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Maresova</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Krejcar</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Attitude toward protective behavior engagement during COVID-19 pandemic in Malaysia: The role of e-government and social media</article-title>
          <source>Front Public Health</source>
          <year>2021</year>
          <volume>9</volume>
          <fpage>609716</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fpubh.2021.609716"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpubh.2021.609716</pub-id>
          <pub-id pub-id-type="medline">33732677</pub-id>
          <pub-id pub-id-type="pmcid">PMC7956949</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cinelli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Quattrociocchi</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Galeazzi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Valensise</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Brugnoli</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Zola</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zollo</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Scala</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The COVID-19 social media infodemic</article-title>
          <source>Sci Rep</source>
          <year>2020</year>
          <month>10</month>
          <day>06</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>16598</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-020-73510-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-020-73510-5</pub-id>
          <pub-id pub-id-type="medline">33024152</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-020-73510-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7538912</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gallotti</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Valle</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Castaldo</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sacco</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>De Domenico</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Assessing the risks of 'infodemics' in response to COVID-19 epidemics</article-title>
          <source>Nat Hum Behav</source>
          <year>2020</year>
          <month>12</month>
          <volume>4</volume>
          <issue>12</issue>
          <fpage>1285</fpage>
          <lpage>1293</lpage>
          <pub-id pub-id-type="doi">10.1038/s41562-020-00994-6</pub-id>
          <pub-id pub-id-type="medline">33122812</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41562-020-00994-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pulido</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Villarejo-Carballido</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Redondo-Sama</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gómez</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 infodemic: More retweets for science-based information on coronavirus than for false information</article-title>
          <source>Int Sociol</source>
          <year>2020</year>
          <month>04</month>
          <day>15</day>
          <volume>35</volume>
          <issue>4</issue>
          <fpage>377</fpage>
          <lpage>392</lpage>
          <pub-id pub-id-type="doi">10.1177/0268580920914755</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abd-Alrazaq</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alhuwail</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Househ</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hamdi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Top concerns of tweeters during the COVID-19 pandemic: Infoveillance study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>04</month>
          <day>21</day>
          <volume>22</volume>
          <issue>4</issue>
          <fpage>e19016</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/4/e19016/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19016</pub-id>
          <pub-id pub-id-type="medline">32287039</pub-id>
          <pub-id pub-id-type="pii">v22i4e19016</pub-id>
          <pub-id pub-id-type="pmcid">PMC7175788</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Rakhami</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Amri</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Lies kill, facts save: Detecting COVID-19 misinformation in Twitter</article-title>
          <source>IEEE Access</source>
          <year>2020</year>
          <volume>8</volume>
          <fpage>155961</fpage>
          <lpage>155970</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2020.3019600</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alsudias</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rayson</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 and Arabic Twitter: How can Arab world governments and public health organizations learn from social media?</article-title>
          <source>Proceedings of the 1st Workshop on NLP for COVID-19 at ACL 2020</source>
          <year>2020</year>
          <conf-name>1st Workshop on NLP for COVID-19 at ACL 2020</conf-name>
          <conf-date>July 9-10, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <publisher-loc>Stroudsburg, PA</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.nlpcovid19-acl.16.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chandrasekaran</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Valkunde</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Moustakas</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Topics, trends, and sentiments of tweets about the COVID-19 pandemic: Temporal infoveillance study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>10</month>
          <day>23</day>
          <volume>22</volume>
          <issue>10</issue>
          <fpage>e22624</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/10/e22624/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/22624</pub-id>
          <pub-id pub-id-type="medline">33006937</pub-id>
          <pub-id pub-id-type="pii">v22i10e22624</pub-id>
          <pub-id pub-id-type="pmcid">PMC7588259</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Janies</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A novel machine learning framework for comparison of viral COVID-19-related Sina Weibo and Twitter posts: Workflow development and content analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>01</month>
          <day>06</day>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>e24889</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/1/e24889/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/24889</pub-id>
          <pub-id pub-id-type="medline">33326408</pub-id>
          <pub-id pub-id-type="pii">v23i1e24889</pub-id>
          <pub-id pub-id-type="pmcid">PMC7790734</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hung</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lauren</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hon</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Birmingham</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hon</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lipsky</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Social network analysis of COVID-19 sentiments: Application of artificial intelligence</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>08</month>
          <day>18</day>
          <volume>22</volume>
          <issue>8</issue>
          <fpage>e22590</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/8/e22590/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/22590</pub-id>
          <pub-id pub-id-type="medline">32750001</pub-id>
          <pub-id pub-id-type="pii">v22i8e22590</pub-id>
          <pub-id pub-id-type="pmcid">PMC7438102</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Imran</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Daudpota</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Kastrati</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Batra</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Cross-cultural polarity and emotion detection using sentiment analysis and deep learning on COVID-19 related tweets</article-title>
          <source>IEEE Access</source>
          <year>2020</year>
          <volume>8</volume>
          <fpage>181074</fpage>
          <lpage>181090</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2020.3027350</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jelodar</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Orji</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Deep sentiment classification and topic discovery on novel coronavirus or COVID-19 online discussions: NLP using LSTM recurrent neural network approach</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2020</year>
          <month>10</month>
          <volume>24</volume>
          <issue>10</issue>
          <fpage>2733</fpage>
          <lpage>2742</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2020.3001216</pub-id>
          <pub-id pub-id-type="medline">32750931</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jo</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Online information exchange and anxiety spread in the early stage of the novel coronavirus (COVID-19) outbreak in South Korea: Structural topic model and network analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>06</month>
          <day>02</day>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>e19455</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/6/e19455/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19455</pub-id>
          <pub-id pub-id-type="medline">32463367</pub-id>
          <pub-id pub-id-type="pii">v22i6e19455</pub-id>
          <pub-id pub-id-type="pmcid">PMC7268668</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kwok</surname>
              <given-names>SWH</given-names>
            </name>
            <name name-style="western">
              <surname>Vadde</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Tweet topics and sentiments relating to COVID-19 vaccination among Australian Twitter users: Machine learning analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>05</month>
          <day>19</day>
          <volume>23</volume>
          <issue>5</issue>
          <fpage>e26953</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/5/e26953/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/26953</pub-id>
          <pub-id pub-id-type="medline">33886492</pub-id>
          <pub-id pub-id-type="pii">v23i5e26953</pub-id>
          <pub-id pub-id-type="pmcid">PMC8136408</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Satu</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>MI</given-names>
            </name>
            <name name-style="western">
              <surname>Mahmud</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Uddin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Summers</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Quinn</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Moni</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>TClustVID: A novel machine learning classification model to investigate topics and sentiment in COVID-19 tweets</article-title>
          <source>Knowl Based Syst</source>
          <year>2021</year>
          <month>08</month>
          <day>17</day>
          <volume>226</volume>
          <fpage>107126</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33972817"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.knosys.2021.107126</pub-id>
          <pub-id pub-id-type="medline">33972817</pub-id>
          <pub-id pub-id-type="pii">S0950-7051(21)00389-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC8099549</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Qu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Rumor detection of COVID-19 pandemic on online social networks</article-title>
          <source>Proceedings of the IEEE/ACM Symposium on Edge Computing</source>
          <year>2020</year>
          <conf-name>IEEE/ACM Symposium on Edge Computing</conf-name>
          <conf-date>November 12-14, 2020</conf-date>
          <conf-loc>San Jose, CA</conf-loc>
          <fpage>376</fpage>
          <lpage>281</lpage>
          <pub-id pub-id-type="doi">10.1109/SEC50012.2020.00055</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Twitter discussions and emotions about the COVID-19 pandemic: Machine learning approach</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>11</month>
          <day>25</day>
          <volume>22</volume>
          <issue>11</issue>
          <fpage>e20550</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/11/e20550/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/20550</pub-id>
          <pub-id pub-id-type="medline">33119535</pub-id>
          <pub-id pub-id-type="pii">v22i11e20550</pub-id>
          <pub-id pub-id-type="pmcid">PMC7690968</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Understanding concerns, sentiments, and disparities among population groups during the COVID-19 pandemic via Twitter data mining: Large-scale cross-sectional study</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>03</month>
          <day>05</day>
          <volume>23</volume>
          <issue>3</issue>
          <fpage>e26482</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/3/e26482/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/26482</pub-id>
          <pub-id pub-id-type="medline">33617460</pub-id>
          <pub-id pub-id-type="pii">v23i3e26482</pub-id>
          <pub-id pub-id-type="pmcid">PMC7939057</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <source>The 2018 Taiwan Communication Survey (Phase Two, Year Two): Media Use and Social Implications</source>
          <year>2020</year>
          <access-date>2021-11-15</access-date>
          <publisher-loc>Taipei, Taiwan</publisher-loc>
          <publisher-name>Taiwan Communication Survey</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://srda.sinica.edu.tw/datasearch_detail.php?id=3053">https://srda.sinica.edu.tw/datasearch_detail.php?id=3053</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Junyi</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Jieba</article-title>
          <source>GitHub</source>
          <access-date>2021-05-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/fxsjy/jieba">https://github.com/fxsjy/jieba</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perrot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duchesnay</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: Machine learning in Python</article-title>
          <source>J Mach Learn Res</source>
          <year>2011</year>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>2830</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rehurek</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sojka</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Software framework for topic modelling with large corpora</article-title>
          <source>Proceedings of the LREC Workshop on New Challenges for NLP Frameworks</source>
          <year>2010</year>
          <conf-name>LREC Workshop on New Challenges for NLP Frameworks</conf-name>
          <conf-date>May 22, 2010</conf-date>
          <conf-loc>La Valleta, Malta</conf-loc>
          <fpage>46</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://is.muni.cz/publication/884893/lrec2010-rehurek-sojka.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.13140/2.1.2393.1847</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <article-title>LINE-rumor-clustering</article-title>
          <source>GitHub</source>
          <access-date>2021-10-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/iorg-tw/LINE-rumor-clustering">https://github.com/iorg-tw/LINE-rumor-clustering</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <article-title>"Director Chen said do not go out before Dragon Boat Festival" is false information</article-title>
          <source>Ministry of Health and Welfare</source>
          <access-date>2021-05-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mohw.gov.tw/cp-4633-52577-1.html">https://www.mohw.gov.tw/cp-4633-52577-1.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <article-title>"Coronavirus will stay in your throat for four days" is a false image and a false rumor!</article-title>
          <source>MyGoPen</source>
          <access-date>2021-05-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mygopen.com/2020/03/gargling-eliminate-coronavirus.html">https://www.mygopen.com/2020/03/gargling-eliminate-coronavirus.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <article-title>"Drinking warm water with salt and vinegar could eradicate coronavirus" is false information</article-title>
          <source>Taiwan FactCheck Center</source>
          <access-date>2021-05-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tfc-taiwan.org.tw/articles/3207">https://tfc-taiwan.org.tw/articles/3207</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <article-title>Coronavirus disease (COVID-19) advice for the public: Mythbusters. FACT: Rinsing your nose with saline does NOT prevent COVID-19</article-title>
          <source>World Health Organization</source>
          <year>2021</year>
          <month>05</month>
          <day>05</day>
          <access-date>2021-05-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/emergencies/diseases/novel-coronavirus-2019/advice-for-public/myth-busters#saline">https://www.who.int/emergencies/diseases/novel-coronavirus-2019/advice-for-public/myth-busters#saline</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <article-title>"Drink salt water to prevent coronavirus" is a misleading false rumor</article-title>
          <source>MyGoPen</source>
          <access-date>2021-05-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mygopen.com/2020/10/salt-water.html">https://www.mygopen.com/2020/10/salt-water.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <article-title>"10 days from today, Taiwan enters the critical period of COVID-19" is misleading information</article-title>
          <source>MyGoPen</source>
          <access-date>2021-05-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mygopen.com/2020/02/10-key.html">https://www.mygopen.com/2020/02/10-key.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <article-title>Rumor has it that "10 days from today, Taiwan will enter the critical period of COVID-19", so what date is today?</article-title>
          <source>Rumor &#38; Truth</source>
          <year>2020</year>
          <access-date>2021-05-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.rumtoast.com/12842">https://www.rumtoast.com/12842</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <article-title>Clarifying "From Taiwan Medical Association, 10 days from today, Taiwan will enter the critical period of COVID-19"</article-title>
          <source>Taiwan FactCheck Center</source>
          <year>2020</year>
          <access-date>2021-05-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tfc-taiwan.org.tw/articles/2547">https://tfc-taiwan.org.tw/articles/2547</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <article-title>COVID-19: Clarification statement of the All-Union Federation</article-title>
          <source>Taiwan Medical Association</source>
          <year>2020</year>
          <month>02</month>
          <day>12</day>
          <access-date>2021-05-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tma.tw/meeting/meeting_info04.asp?/9112.html">https://www.tma.tw/meeting/meeting_info04.asp?/9112.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Porter</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>The elusive backfire effect: Mass attitudes’ steadfast factual adherence</article-title>
          <source>Polit Behav</source>
          <year>2018</year>
          <month>1</month>
          <day>16</day>
          <volume>41</volume>
          <issue>1</issue>
          <fpage>135</fpage>
          <lpage>163</lpage>
          <pub-id pub-id-type="doi">10.1007/s11109-018-9443-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>LHX</given-names>
            </name>
            <name name-style="western">
              <surname>Loke</surname>
              <given-names>JY</given-names>
            </name>
          </person-group>
          <article-title>Analyzing public opinion and misinformation in a COVID-19 Telegram group chat</article-title>
          <source>IEEE Internet Comput</source>
          <year>2021</year>
          <month>3</month>
          <day>1</day>
          <volume>25</volume>
          <issue>2</issue>
          <fpage>84</fpage>
          <lpage>91</lpage>
          <pub-id pub-id-type="doi">10.1109/mic.2020.3040516</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jian</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Driscoll</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bar</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>The diffusion of misinformation on social media: Temporal pattern, message, and source</article-title>
          <source>Comput Human Behav</source>
          <year>2018</year>
          <month>06</month>
          <volume>83</volume>
          <fpage>278</fpage>
          <lpage>287</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chb.2018.02.008</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
