<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i7e37201</article-id>
      <article-id pub-id-type="pmid">35852829</article-id>
      <article-id pub-id-type="doi">10.2196/37201</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Extraction of Explicit and Implicit Cause-Effect Relationships in Patient-Reported Diabetes-Related Tweets From 2017 to 2021: Deep Learning Approach</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hulman</surname>
            <given-names>Adam</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Pollack</surname>
            <given-names>Catherine</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Ahne</surname>
            <given-names>Adrian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Center of Epidemiology and Population Health</institution>
            <institution>Inserm, Hospital Gustave Roussy</institution>
            <institution>Paris-Saclay University</institution>
            <addr-line>20 Rue du Dr Pinel</addr-line>
            <addr-line>Villejuif, 94800</addr-line>
            <country>France</country>
            <phone>33 142115386</phone>
            <email>adrian.ahne@protonmail.com</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9463-9064</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Khetan</surname>
            <given-names>Vivek</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4394-4859</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Tannier</surname>
            <given-names>Xavier</given-names>
          </name>
          <degrees>Prof Dr</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2452-8868</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Rizvi</surname>
            <given-names>Md Imbesat Hassan</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0150-8677</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Czernichow</surname>
            <given-names>Thomas</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8918-6352</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Orchard</surname>
            <given-names>Francisco</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5793-3301</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Bour</surname>
            <given-names>Charline</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8537-2097</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Fano</surname>
            <given-names>Andrew</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Fagherazzi</surname>
            <given-names>Guy</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5033-5966</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Center of Epidemiology and Population Health</institution>
        <institution>Inserm, Hospital Gustave Roussy</institution>
        <institution>Paris-Saclay University</institution>
        <addr-line>Villejuif</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Epiconcept Company</institution>
        <addr-line>Paris</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Accenture Labs</institution>
        <addr-line>San Francisco, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Laboratoire d'Informatique Médicale et d'Ingénierie des Connaissances pour la e-Santé</institution>
        <institution>Inserm, University Sorbonne Paris Nord</institution>
        <institution>Sorbonne University</institution>
        <addr-line>Paris</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Indian Institute of Science</institution>
        <addr-line>Bengaluru</addr-line>
        <country>India</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Deep Digital Phenotyping Research Unit</institution>
        <institution>Department of Precision Health</institution>
        <institution>Luxembourg Institute of Health</institution>
        <addr-line>Strassen</addr-line>
        <country>Luxembourg</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Adrian Ahne <email>adrian.ahne@protonmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>19</day>
        <month>7</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>7</issue>
      <elocation-id>e37201</elocation-id>
      <history>
        <date date-type="received">
          <day>10</day>
          <month>2</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>21</day>
          <month>4</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>17</day>
          <month>5</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>4</day>
          <month>6</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Adrian Ahne, Vivek Khetan, Xavier Tannier, Md Imbesat Hassan Rizvi, Thomas Czernichow, Francisco Orchard, Charline Bour, Andrew Fano, Guy Fagherazzi. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 19.07.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/7/e37201" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Intervening in and preventing diabetes distress requires an understanding of its causes and, in particular, from a patient’s perspective. Social media data provide direct access to how patients see and understand their disease and consequently show the causes of diabetes distress.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>Leveraging machine learning methods, we aim to extract both explicit and implicit cause-effect relationships in patient-reported diabetes-related tweets and provide a methodology to better understand the opinions, feelings, and observations shared within the diabetes online community from a causality perspective.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>More than 30 million diabetes-related tweets in English were collected between April 2017 and January 2021. Deep learning and natural language processing methods were applied to focus on tweets with personal and emotional content. A cause-effect tweet data set was manually labeled and used to train (1) a fine-tuned BERTweet model to detect causal sentences containing a causal relation and (2) a conditional random field model with Bidirectional Encoder Representations from Transformers (BERT)-based features to extract possible cause-effect associations. Causes and effects were clustered in a semisupervised approach and visualized in an interactive cause-effect network.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Causal sentences were detected with a recall of 68% in an imbalanced data set. A conditional random field model with BERT-based features outperformed a fine-tuned BERT model for cause-effect detection with a macro recall of 68%. This led to 96,676 sentences with cause-effect relationships. “Diabetes” was identified as the central cluster followed by “death” and “insulin.” Insulin pricing–related causes were frequently associated with death.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>A novel methodology was developed to detect causal sentences and identify both explicit and implicit, single and multiword cause, and the corresponding effect, as expressed in diabetes-related tweets leveraging BERT-based architectures and visualized as cause-effect network. Extracting causal associations in real life, patient-reported outcomes in social media data provide a useful complementary source of information in diabetes research.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>causality</kwd>
        <kwd>deep learning</kwd>
        <kwd>natural language processing</kwd>
        <kwd>diabetes</kwd>
        <kwd>social media</kwd>
        <kwd>causal relation extraction</kwd>
        <kwd>social media data</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Diabetes distress refers to psychological factors such as emotional burden, worries, frustration, or stress in the day-to-day management of all types of diabetes [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. Diabetes distress is associated with poor quality of life [<xref ref-type="bibr" rid="ref4">4</xref>], high hemoglobin A<sub>1C</sub> levels [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>], and low medication adherence [<xref ref-type="bibr" rid="ref7">7</xref>]. Reducing diabetes distress may improve hemoglobin A<sub>1c</sub> levels and reduce the burden of disease among people with diabetes [<xref ref-type="bibr" rid="ref8">8</xref>]. Social media is a useful observatory resource for patient-reported diabetes issues and could help to contribute directly to public and clinical decision-making from a patient’s perspective, given the active online diabetes community [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Identifying causal relations in expressed text data in social media platforms might help to discover unknown etiological results, specifically, causes of health problems, concerns, and symptoms.</p>
      <p>To intervene and potentially prevent diabetes distress, it is necessary to understand the causes of diabetes distress from a patient’s perspective to understand how patients see their disease. Causal relation extraction in natural language text has gained popularity in clinical decision-making, biomedical knowledge discovery, or emergency management [<xref ref-type="bibr" rid="ref11">11</xref>]. In particular, causal relations on Twitter have been examined for diverse factors causing stress and relaxation [<xref ref-type="bibr" rid="ref12">12</xref>], adverse drug reactions [<xref ref-type="bibr" rid="ref13">13</xref>], or causal associations related to insomnia or headache [<xref ref-type="bibr" rid="ref14">14</xref>]. Most approaches examine <italic>explicit</italic> causality in text [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>], when cause and effect are explicitly stated, for instance, by connective words (eg, so, hence, because, lead to, since, if-then) [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. An example for an <italic>explicit</italic> cause-effect pair is “diabetes causes hypoglycemia.” However, <italic>implicit</italic> causality is more complicated to detect such as in “I reversed diabetes with lifestyle changes” with cause “lifestyle changes” and effect “reversed diabetes.”</p>
      <p>Natural language processing methods explore among other things how computers can be used to extract useful information from natural language documents. In combination with machine learning and deep learning models, which are artificial intelligence algorithms designed to learn from experience, they have also been applied to extract causal relations [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. Machine learning methods are able to explore implicit relations and provide better generalization contrary to rule-based approaches [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. An interesting approach leveraging the transfer learning paradigm and addressing both explicit and implicit cause-effect extraction is provided by Khetan et al [<xref ref-type="bibr" rid="ref23">23</xref>]. They fine-tuned pretrained transformer-based Bidirectional Encoder Representations from Transformers (BERT) language models [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>] to detect “cause-effect” relationships by using publicly available data sets such as the adverse drug effect data set [<xref ref-type="bibr" rid="ref26">26</xref>]. More generally, the idea of transfer learning is to leverage the knowledge of a model that has been trained on an auxiliary domain [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
      <p>In this study, we aimed to extract spans of text as 2 distinct events from diabetes and diabetes-related tweets such that one event directly (explicit) or indirectly (implicit) impacts another event. We categorized these events as cause-event and effect-event depending upon the expressed context of each tweet. The identified cause and effect will then be aggregated into clusters and ultimately visualized in an interactive cause-effect network.</p>
      <p>This work is realized in the frame of the World Diabetes Distress Study, which aims to analyze what is shared on social media worldwide to better understand what people with diabetes and diabetes distress are experiencing [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. The social network “Twitter” is a popular data resource among diabetes researchers owing to its public character and its active online diabetes community compared to other social media [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. Recent studies suggest an overrepresentation of people with type 1 diabetes compared to those with type 2 diabetes who are active on Twitter [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref31">31</xref>].</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>On the basis of diabetes-related tweets, we first preprocessed tweets to only focus on personal, nonjoke, and emotional content. Second, after this preprocessing step, we split tweets into sentences for our analyses, as we aimed to identify the cause-effect relationships between events within a sentence (sentence level) and not across multiple sentences (tweet level). This also simplifies model training and helps with easier learning. Third, we identified sentences in which causal information (opinion, observation, etc) is communicated. In the fourth step, causes and their corresponding effects were extracted. Lastly, those cause-effect pairs were aggregated, described, and visualized. The entire workflow is illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Workflow. The steps shown in green include machine learning methods. CRF: conditional random field.</p>
          </caption>
          <graphic xlink:href="medinform_v10i7e37201_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Collection and Ethical Considerations</title>
        <p>Via Twitter’s streaming application programming interface, 32 million diabetes-related tweets in English were collected between April 2017 and January 2021 based on a list of diabetes-related keywords such as <italic>diabetes, hypoglycemia, hyperglycemia,</italic> and <italic>insulin</italic> from all over the world (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for the full list of keywords used). This is an extended data set of the one used in earlier works [<xref ref-type="bibr" rid="ref9">9</xref>]. All data collected in this study were publicly posted on Twitter. Therefore, according to the privacy policy of Twitter, users agree to have this information available to the general public [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
      </sec>
      <sec>
        <title>Data Preprocessing</title>
        <p>Tweets are noisy and unstructured. They contain many misspelled or nonstandard English words. To reduce noise in the data set, we applied a preprocessing pipeline similar to that in earlier works, the details of which are summarized in <xref rid="figure1" ref-type="fig">Figure 1</xref> [<xref ref-type="bibr" rid="ref9">9</xref>]. First, retweets and duplicates were removed to obtain a database with 7.7 million unique tweets. Second, we determined only tweets with <italic>personal</italic> content where feelings, emotions, and opinions could be shared by people with or talking about diabetes and excluded <italic>institutional</italic> tweets referring to commercial, news, or health information. To identify <italic>personal</italic> content in tweets, we leveraged the transfer learning paradigm and fine-tuned the already pretrained transformer-based language model <italic>BERTweet</italic>, which was pretrained on 850 million English tweets (16 billion word tokens ~ 80 GB) [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. To use the model and fine-tune it for a binary sentence classification, a linear layer was added on top of the last transformer layer of the <italic>BERTweet</italic> model by using the <italic>transformers</italic> package of HuggingFace [<xref ref-type="bibr" rid="ref33">33</xref>]. The model was then fine-tuned with an extended data set of one used in earlier works, leading to a total of 4303 tweets (1539 <italic>personal</italic> and 2764 <italic>institutional</italic>) to account for a possible temporal divergence of the way people tweet [<xref ref-type="bibr" rid="ref9">9</xref>]. The model performance to identify tweets with personal content had accuracy of 91.2%, precision of 86.2%, recall of 90.9%, and F1 score of 88.5%. The trained model was then applied to all unique tweets, resulting in a total of 2.5 million tweets with personal content. Moreover, jokes around diabetes are common on Twitter and were considered out of scope for this study as well. Similar to the <italic>personal</italic> content classifier, <italic>BERTweet</italic> was fine-tuned to detect if a tweet is a joke. For this purpose, a joke tweet data set from earlier works was extended to 1648 tweets (486 jokes, 1162 nonjokes) [<xref ref-type="bibr" rid="ref9">9</xref>]. The performance to identify if a tweet is a joke had accuracy of 90.4%, precision of 78.5%, recall of 90.8%, and F1 score of 84.2%. Applying the joke classifier on all tweets with personal content led to a data set of 1.8 million personal nonjoke tweets.</p>
        <p>A particular focus of this study was on studying diabetes distress and thus, the psychological factors and emotions. To capture these factors in tweets, only tweets containing an emotional element such as emojis/emoticons or emotional words were kept. Emotional words were identified based on a combination of the psychologue Parrot’s hierarchical classification of emotions with the 6 primary emotions (<italic>joy, love, surprise, sadness, anger, fear</italic>) and emotional words present in common questionnaires to study diabetes distress such as the Problem Areas in Diabetes scale and Diabetes Distress Scale [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. This led to 562,013 tweets containing personal, nonjoke, and emotional content. More details on the preprocessing pipeline are summarized in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref40">40</xref>].</p>
      </sec>
      <sec>
        <title>Data Annotation</title>
        <p>In order to identify causal sentences and <italic>cause-effect</italic> association, 5000 randomly chosen diabetes-related tweets were selected, preprocessed, split into sentences, and then manually labeled. We did not restrict ourselves to a specific area of diabetes-related causal relationships, and we included potentially all types. <xref ref-type="table" rid="table1">Table 1</xref> illustrates some example sentences. Only causal relationships related to diabetes were labeled as positive samples, whereas non–diabetes-related or unclear cause-effect relationships were labeled as negative samples. For a more detailed explanation on the annotation, please refer to our annotation guidelines in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Sample sentences in different label scenarios. The examples are fictive to ensure privacy.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="100"/>
            <col width="180"/>
            <col width="140"/>
            <col width="280"/>
            <thead>
              <tr valign="top">
                <td>Sentences</td>
                <td>Cause</td>
                <td>Effect</td>
                <td>Causal association</td>
                <td>Explanation</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Diabetes causes me to have mood swings</td>
                <td>Diabetes</td>
                <td>mood swings</td>
                <td>1</td>
                <td>Possible causal association</td>
              </tr>
              <tr valign="top">
                <td>I just want to eat, I hate #diabetes</td>
                <td>#diabetes</td>
                <td>hate</td>
                <td>1</td>
                <td>Possible causal association related to diabetes distress</td>
              </tr>
              <tr valign="top">
                <td>Scary, have a diabetic daughter but I read thousands of people a year die in the United Kingdom just from flu so why panic over corona.</td>
                <td>—<sup>a</sup></td>
                <td>—</td>
                <td>0</td>
                <td>Nondiabetes or diabetes distress–related relationship. “Flu” is not diabetes-related</td>
              </tr>
              <tr valign="top">
                <td>Had two strokes and recover now and also have high blood pressure and diabetes. <inline-graphic xlink:href="medinform_v10i7e37201_fig9.png" xlink:type="simple" mimetype="image"/></td>
                <td>—</td>
                <td>—</td>
                <td>0</td>
                <td>Unclear cause-effect relationship. Not clear if “high blood pressure” or “diabetes” caused the stroke</td>
              </tr>
              <tr valign="top">
                <td>Not sure if I've been up since 3:30 to watch Titanic or because of my anxiety over my glucose test is what keeps me up <inline-graphic xlink:href="medinform_v10i7e37201_fig6.png" xlink:type="simple" mimetype="image"/></td>
                <td>glucose test</td>
                <td>anxiety</td>
                <td>1</td>
                <td>Chaining cause-effect relationship <break/> 
                (A-&#62;B-&#62;C)  <break/>  
            Event A: glucose test  <break/>  
            Event B: anxiety  <break/>  
            Event C: been up since 3:30  <break/>  
            =&#62; label the relationship which is closest to our study objective: diabetes and diabetes distress</td>
              </tr>
              <tr valign="top">
                <td>My 14-year-old daughter is type 1 = malfunctioning pancreas, meaning not enough insulin being made to regulate <inline-graphic xlink:href="medinform_v10i7e37201_fig7.png" xlink:type="simple" mimetype="image"/></td>
                <td>type 1</td>
                <td>malfunctioning pancreas; not enough insulin</td>
                <td>1</td>
                <td>Negation in a cause/effect is considered being part of the cause/effect as it does not alter the meaning</td>
              </tr>
              <tr valign="top">
                <td>It is not true to think that insulin makes you feel so bad <inline-graphic xlink:href="medinform_v10i7e37201_fig8.png" xlink:type="simple" mimetype="image"/></td>
                <td>insulin</td>
                <td>feel so bad</td>
                <td>0</td>
                <td>Negation is not part of cause/effect and alters the meaning</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Not available.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Labeling cause-effect pairs is a complex task. To verify the reliability of the labeling, 2 authors labeled 500 sentences independently and we calculated Cohen κ score, a statistical measure expressing the level of agreement between 2 annotators [<xref ref-type="bibr" rid="ref41">41</xref>]. We obtained a score of 0.83, which is interpreted as an <italic>almost perfect</italic> agreement according to Altman [<xref ref-type="bibr" rid="ref42">42</xref>] and Landis and Koch [<xref ref-type="bibr" rid="ref43">43</xref>]. Disagreements were discussed between 2 authors, and 1 author labelled the other samples, resulting in 8235 labelled sentences (7218 noncausal sentences and 1017 causal sentences) from 5000 tweets.</p>
      </sec>
      <sec>
        <title>Models</title>
        <p>The first model was trained to predict if a sentence contains a potential cause-effect association (causal sentence), and the second model extracted the specific cause and the associated effect from the causal sentence. Thus, the first model acts like a barrier and filters noncausal sentences out. These sentences may have either a cause, an effect, none of them, but not both. To simplify the model training, we hypothesized that cause-effect pairs only occur in the same sentence and we removed all sentences with less than 6 words owing to a lack of context. For this reason, we operated on a sentence level and not at the tweet level. Additional challenges in our setting were that <italic>causes</italic> and <italic>effects</italic> could be multiword entities and the language used on Twitter is nonstandard with frequent slang and misspelled words.</p>
      </sec>
      <sec>
        <title>Causal Sentence Detection</title>
        <p>The identification of causal sentences is a binary classification task. The pretrained language model <italic>BERTweet</italic> served as a foundation for the model architecture capable of handling the nonstandard nature of Twitter data [<xref ref-type="bibr" rid="ref32">32</xref>]. A feed-forward network is built on top of the <italic>BERTweet</italic> [<xref ref-type="bibr" rid="ref32">32</xref>] architecture consisting of 2 fully connected layers with dropout layers with a probability of 0.3, finalized by a softmax layer, which translates the model predictions into probabilities (<xref rid="figure2" ref-type="fig">Figure 2</xref>). To adjust for the class imbalance in the labeled data, class weights were included as parameters in the categorical cross-entropy loss function to penalize mispredictions for causal sentences strongly. Initially, labelled data were stratified, and 10% of it was kept as test set. The remaining 90% of the samples were further separated into training and validation sets with 80:20 split.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Model architecture for causal sentence detection. FCLL: fully connected linear layer; p: probability of an element to be zeroed.</p>
          </caption>
          <graphic xlink:href="medinform_v10i7e37201_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Augmentation Through Active Learning</title>
        <p>Data imbalance on the one hand and the limited number of positive training examples for each cause-effect pair on the other hand (as causes and effects could potentially be related to any concept in the diabetes domain) drove us to adopt an active learning approach to increase the training data. Active learning is a sample selection approach aiming to minimize the annotation cost while maximizing the performance of machine learning–based models [<xref ref-type="bibr" rid="ref44">44</xref>]. It has been widely applied on textual data [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. The training data were increased in several iterations, as illustrated in <xref rid="figure3" ref-type="fig">Figure 3</xref>.</p>
        <p>The first iteration started by training the causal sentence classifier on sentences from the 5000 tweets. The trained classifier was then applied on 2000 randomly selected unlabeled tweets, which were preprocessed and split into sentences, resulting in a set of causal sentences and a set of noncausal sentences. The sentences predicted as causal sentences were examined manually, and possible misclassifications were corrected to ensure clean positive training samples. The noncausal sentence set remained untouched. As a consequence, potential misclassifications remained in the noncausal sentence set, which should then be considered noisy. Both the causal and noncausal sentence set were then combined and added as new training data to the already labeled data, leading to an updated training set of 7000 tweets. This process was iterated 4 times and allowed us to augment the labelled data much faster and more efficiently than that without active learning, as it enables us to focus on the few positive samples. The final training set was used to train the classification model and the cause-effect extraction model.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Active learning loop to augment the training set in a time-efficient fashion.</p>
          </caption>
          <graphic xlink:href="medinform_v10i7e37201_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Cause-Effect Pairs</title>
        <p>After having trained the causal sentence classifier to detect sentences with causal information, we identified the specific cause-effect pairs in the causal sentences. The identification of cause-effect pairs was casted as an event extraction or named-entity recognition task, that is, assigning a label cause or effect to a sequence of words. The manually labeled causes and effects were encoded in an IO tagging format based on the common tagging format BIO (Beginning, Inside, Outside), introduced by Ramshaw and Marcus [<xref ref-type="bibr" rid="ref47">47</xref>]. Here, “I-C” denotes inside the cause and “I-E” inside the effect. Those 2 tags were completed by the outside tag “O,” symbolizing that the word is neither cause nor effect. The IO tagging scheme for the example sentence with cause “prediabetes” and effect “change my lifestyle” is summarized:</p>
        <p>Sentence: Prediabetes, forces, me, to, change, my, lifestyle</p>
        <p>IO tags: I-C, O, O, O, I-E, I-E, I-E</p>
        <p>Note that a word can be both cause or effect depending on the context. For instance “prediabetes” in “Prediabetes forces me to change my lifestyle” takes the role of a cause, whereas in “Limited exercising may lead to prediabetes,” it is a possible effect. IO tagging was preferred over BIO tagging to simplify the model learning by reducing the number of class from 5 to 3. Moreover, the task is complex and considered open domain, as causes and effects are not restricted to 1 specific topic but can be related to any concept in our target domain (diabetes). As a consequence, the creation of a representative training set is challenging, as most cause-effect pairs occur rarely. This complexity drove us to test several model architectures; refer to <xref rid="figure4" ref-type="fig">Figure 4</xref> for an overview.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Model architectures of cause-effect identification. CRF: conditional random field; FCLL: fully connected linear layer; p: probability of an element to be zeroed.</p>
          </caption>
          <graphic xlink:href="medinform_v10i7e37201_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <list list-type="order">
          <list-item>
            <p>BERT_FFL: Pretrained BERTweet language model and on top, 2 feed forward layers with a dropout of 0.3, followed by a softmax layer. For the model training, the cross-entropy loss function is selected and weighted by the class weights to penalize mispredictions for causes and effects stronger.</p>
          </list-item>
          <list-item>
            <p>WE_BERT_CRF: Single conditional random field (CRF) layer with BERTweet embeddings as features augmented by discrete features such as if the word is lowercase, digit, or the word length. CRFs are a standard statistical sequential classification method to identify entities in a text [<xref ref-type="bibr" rid="ref48">48</xref>]. The CRF function is implemented with the python package sklearn-crfsuite [<xref ref-type="bibr" rid="ref49">49</xref>] based on CRFsuite [<xref ref-type="bibr" rid="ref50">50</xref>]. As parameters for the CRF function, the default algorithm “Gradient descent using the Limited Memory Broyden-Fletcher-GoldfarbShanno method” was chosen, and the coefficient for L1 and L2 regularization was 0.1.</p>
          </list-item>
          <list-item>
            <p>FastText_CRF: Similar to WE_BERT_CRF, with the difference that BERTweet embeddings were replaced by FastText embeddings in the feature vector for each word. FastText vectors trained on similar diabetes-related tweets, which were well adapted to our use case [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Clustering of Causes and Effects</title>
        <p>A large part of <italic>causes</italic> and <italic>effects</italic> can be regrouped into similar concepts (clusters) to facilitate analyses and allow effective network analyses. We chose a semisupervised, time-efficient approach in which 1000 <italic>causes</italic> and 1000 <italic>effects</italic> were randomly chosen and 2 researchers manually grouped these into clusters such as “diabetes,” “death,” “family,” and “fear,” hereinafter referred to as “parent clusters” to simplify understanding. The remaining <italic>causes</italic> and <italic>effects</italic> were then automatically compared to each element of all the clusters based on <italic>FastText</italic> vectors and cosine similarity and associated with the cluster containing the most similar element. Experimentally, a similarity threshold of 0.55 was determined; if a cause/effect had a similarity smaller than this threshold for all elements, a new cluster was created for this cause/effect. These clusters were also visualized in an interactive cause-effect network, developed in D3, to enable further exploration of the cause-effect association about diabetes distress communication in social media. Python (version 3.8.8) and the deep learning framework PyTorch (version 1.8.1) were used to implement the abovementioned methods. The algorithms are open sourced under [<xref ref-type="bibr" rid="ref51">51</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>The following results were obtained from 482,583 sentences, which were obtained from splitting the 562,013 personal, emotional, and nonjoke tweets into sentences, excluding questions and including only sentences with more than 5 words.</p>
      <sec>
        <title>Model Training and Performance</title>
        <sec>
          <title>Causal Sentences</title>
          <p>Hyperparameters for the model training were optimized, and the best model was trained with an Adam optimizer with a learning rate of 1e-3 among [1e-2, 1e-3, 1e-4] and a scheduler with linearly decreasing learning rate with 0 warmup steps. The optimal batch size was obtained for 16 among [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref32">32</xref>], and we trained for 35 epochs with early stopping. The performances to detect causal sentences for the imbalanced data set are illustrated in <xref ref-type="table" rid="table2">Table 2</xref> for each round of the active learning loop, with each round having been trained on more data. The highest accuracy was reached in round 4 with 71%. We applied the model of round 4 on all the remaining tweets, as it was trained on the largest training data set, including difficult causal examples missed by earlier models and is thus better at identifying complex causal sentences. The active learning strategy led us to increase the training data much quicker than that without active learning and without loss in performance. This led to a clean database of 265,328 causal sentences with the most noisy sentences removed.</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Performance measures (macro) for each round of more training data.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="70"/>
              <col width="230"/>
              <col width="220"/>
              <col width="160"/>
              <col width="160"/>
              <col width="160"/>
              <thead>
                <tr valign="top">
                  <td>Round</td>
                  <td>Sentences in training set (n)</td>
                  <td>Sentences in test set (n)</td>
                  <td>Accuracy (%)</td>
                  <td>Precision (%)</td>
                  <td>Recall (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>0</td>
                  <td>6024</td>
                  <td>837</td>
                  <td>64.5</td>
                  <td>58.0</td>
                  <td>67.4</td>
                </tr>
                <tr valign="top">
                  <td>1</td>
                  <td>7536</td>
                  <td>1047</td>
                  <td>67.7</td>
                  <td>61.2</td>
                  <td>71.6</td>
                </tr>
                <tr valign="top">
                  <td>2</td>
                  <td>8804</td>
                  <td>1223</td>
                  <td>67.7</td>
                  <td>60.3</td>
                  <td>66.3</td>
                </tr>
                <tr valign="top">
                  <td>3</td>
                  <td>10,284</td>
                  <td>1429</td>
                  <td>65.4</td>
                  <td>60.0</td>
                  <td>68.8</td>
                </tr>
                <tr valign="top">
                  <td>4</td>
                  <td>11,861</td>
                  <td>1648</td>
                  <td>71.0</td>
                  <td>61.0</td>
                  <td>67.8</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
        <sec>
          <title>Cause and Effect Detection</title>
          <p>After having identified the causal sentences, the cause-effect models were trained to extract the specific cause-effect pairs. The active learning strategy led to an extended data set of 2118 causal sentences, that is, containing both cause and effect, of which 10% were used as a test set while the remaining 90% were further used to create a training and validation set with an 80:20 split. The performances of the different cause-effect models are listed in <xref ref-type="table" rid="table3">Table 3</xref>. The best performing model was the CRF model with BERT-embedding features (WE_BERT_CRF) with a precision, recall, and F1 score of 0.68. Surprisingly, it outperforms fine-tuning a BERT model, which is considered the gold standard of current named-entity recognition tasks. A potential explanation for this is that BERT-based models make local decisions at every point of the sequence taking the neighboring words into account before its decision. In a situation like ours, with strong uncertainty on all elements, owing to the complexity of the task, a single CRF layer model leveraging BERT features, making global decisions using the local context of each word, maximizes the probability of the whole sequence of the decision better. Moreover, the CRF model with simpler FastText models achieved strong results as well with one reason being probably that the word embeddings were specifically trained on this diabetes corpus.</p>
          <p>Consequently, the WE_BERT_CRF model was applied on all causal sentences leading to a data set of 96,676 sentences with the <italic>cause</italic> and associated <italic>effect</italic> predicted.</p>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Performance measures for each of the 4 architectures.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="470"/>
              <col width="0"/>
              <col width="170"/>
              <col width="0"/>
              <col width="170"/>
              <col width="0"/>
              <col width="160"/>
              <thead>
                <tr valign="top">
                  <td colspan="3">Models</td>
                  <td colspan="2">Precision</td>
                  <td colspan="2">Recall</td>
                  <td>F1 score</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="8">
                    <bold>BERT_FFL</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>I-C</td>
                  <td colspan="2">0.48</td>
                  <td colspan="2">0.46</td>
                  <td colspan="2">0.47</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>I-E</td>
                  <td colspan="2">0.20</td>
                  <td colspan="2">0.48</td>
                  <td colspan="2">0.29</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>O</td>
                  <td colspan="2">0.91</td>
                  <td colspan="2">0.77</td>
                  <td colspan="2">0.83</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>macro</td>
                  <td colspan="2">0.53</td>
                  <td colspan="2">0.57</td>
                  <td colspan="2">0.53</td>
                </tr>
                <tr valign="top">
                  <td colspan="8">
                    <bold>WE_BERT_CRF</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>I-C</td>
                  <td colspan="2">0.63</td>
                  <td colspan="2">0.61</td>
                  <td colspan="2">0.62</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>I-E</td>
                  <td colspan="2">0.49</td>
                  <td colspan="2">0.49</td>
                  <td colspan="2">0.49</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>O</td>
                  <td colspan="2">0.93</td>
                  <td colspan="2">0.93</td>
                  <td colspan="2">0.93</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>macro</td>
                  <td colspan="2">0.68</td>
                  <td colspan="2">0.68</td>
                  <td colspan="2">0.68</td>
                </tr>
                <tr valign="top">
                  <td colspan="8">
                    <bold>FastText_CRF</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>I-C</td>
                  <td colspan="2">0.59</td>
                  <td colspan="2">0.57</td>
                  <td colspan="2">0.58</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>I-E</td>
                  <td colspan="2">0.45</td>
                  <td colspan="2">0.38</td>
                  <td colspan="2">0.41</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>O</td>
                  <td colspan="2">0.92</td>
                  <td colspan="2">0.94</td>
                  <td colspan="2">0.93</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>macro</td>
                  <td colspan="2">0.65</td>
                  <td colspan="2">0.63</td>
                  <td colspan="2">0.64</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
      </sec>
      <sec>
        <title> Cause-Effect Description</title>
        <p>The semisupervised clustering led to 1751 clusters. To remove noisy clusters through potential misclassifications, only clusters with a minimal number of 10 cause/effect occurrences were considered for the following analyses, resulting in 763 clusters. Note that the order of documents might affect the results, as different clusters might have been created. Please refer to <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> for an overview over the 100 largest clusters (automatically added clusters have “other” as “parent cluster”).</p>
        <p><xref ref-type="table" rid="table4">Table 4</xref> provides an overview over the largest clusters, containing either cause or effect. <xref ref-type="table" rid="table5">Table 5</xref> provides the most frequent cause-effect associations, excluding the largest cluster “diabetes,” as it will be studied separately. The cluster “diabetes” is the largest one with 66,775 occurrences of “diabetes” as either cause or effect (eg, diabetes, #diabetes, diabetes mellitus) followed by “death” with 16,989 (eg, passed away, killed, died, suicide) and “insulin” (eg, insulin, insulin hormone) with 14,148 occurrences. From the 30 largest clusters, 6 refer to nutrition, 4 to diabetes, and 3 to each of insulin, emotions, and the health care system. The most frequent cause-effect is “unable to afford insulin,” which causes “death” expressed in 1246 cases, followed by “insulin” causing “death” with 1156 cases and “type 1 diabetes” causing “fear” with 1054 cases.</p>
        <p>The largest cluster “diabetes” mainly occurs as a cause and its 10 most frequent effects are death (n=7446), fear (n=4836), sick (n=2799), neuropathy (n=2477), hypoglycemia (n=2062), anger (n=1908), suffer (n=1808), insulin (n=1605), overweight (n=1506), and reduce weight (n=1487). From the 30 most numerous effects for “diabetes,” 6 were related to “nutrition” and 5 to “complications and comorbidities” and 3 to each of “diabetes distress,” “emotions,” and “health care system.”</p>
        <p>The interactive visualization in D3 with filter options is published in [<xref ref-type="bibr" rid="ref52">52</xref>]. <xref rid="figure5" ref-type="fig">Figure 5</xref> provides an example graph of this visualization showing only cause-effect relationships with at least 250 occurrences to ensure readability. It is striking that “death” seems to play such a central role as <italic>effect</italic> with various causes (unable to afford insulin, rationing insulin, finance, insulin, type 1 diabetes, overweight) pointing at it. Other central nodes are type 1 diabetes acting as cause for insulin pump, insulin, hypoglycemia (hypo), sickness, finance, and anger, and fear emotions, where the latter has the strongest association, or the node “insulin” mostly relating as cause for sickness, medication, finance, death, or hypoglycemia and fear and anger.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>The most frequent clusters (causes and effects) with the number of occurrences.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="430"/>
            <col width="430"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Parent cluster</td>
                <td>Cluster</td>
                <td>Value (n)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Diabetes</td>
                <td>diabetes</td>
                <td>66,775</td>
              </tr>
              <tr valign="top">
                <td>Death</td>
                <td>death</td>
                <td>16,989</td>
              </tr>
              <tr valign="top">
                <td>Insulin</td>
                <td>insulin</td>
                <td>14,148</td>
              </tr>
              <tr valign="top">
                <td>Diabetes</td>
                <td>type 1 diabetes</td>
                <td>11,693</td>
              </tr>
              <tr valign="top">
                <td>Emotions</td>
                <td>fear</td>
                <td>10,160</td>
              </tr>
              <tr valign="top">
                <td>Glycemic variability</td>
                <td>hypoglycemia</td>
                <td>9547</td>
              </tr>
              <tr valign="top">
                <td>Symptoms</td>
                <td>sick</td>
                <td>6549</td>
              </tr>
              <tr valign="top">
                <td>Nutrition</td>
                <td>overweight</td>
                <td>5186</td>
              </tr>
              <tr valign="top">
                <td>Diabetes</td>
                <td>type 2 diabetes</td>
                <td>4909</td>
              </tr>
              <tr valign="top">
                <td>Complications and comorbidities</td>
                <td>neuropathy</td>
                <td>4481</td>
              </tr>
              <tr valign="top">
                <td>Health care system</td>
                <td>medication</td>
                <td>4389</td>
              </tr>
              <tr valign="top">
                <td>Diabetes Technology</td>
                <td>insulin pump</td>
                <td>4307</td>
              </tr>
              <tr valign="top">
                <td>Nutrition</td>
                <td>nutrition</td>
                <td>4230</td>
              </tr>
              <tr valign="top">
                <td>Emotions</td>
                <td>anger</td>
                <td>4149</td>
              </tr>
              <tr valign="top">
                <td>Health</td>
                <td>oral glucose tolerance test</td>
                <td>4053</td>
              </tr>
              <tr valign="top">
                <td>Blood pressure</td>
                <td>hypertension</td>
                <td>3782</td>
              </tr>
              <tr valign="top">
                <td>Health care system</td>
                <td>finance</td>
                <td>3767</td>
              </tr>
              <tr valign="top">
                <td>Nutrition</td>
                <td>reduce weight</td>
                <td>3589</td>
              </tr>
              <tr valign="top">
                <td>Insulin</td>
                <td>unable to afford insulin</td>
                <td>3381</td>
              </tr>
              <tr valign="top">
                <td>Nutrition</td>
                <td>diet</td>
                <td>3325</td>
              </tr>
              <tr valign="top">
                <td>Emotions</td>
                <td>sadness</td>
                <td>3153</td>
              </tr>
              <tr valign="top">
                <td>Glycemic variability</td>
                <td>hyperglycemia</td>
                <td>3144</td>
              </tr>
              <tr valign="top">
                <td>Diabetes</td>
                <td>suffer</td>
                <td>3132</td>
              </tr>
              <tr valign="top">
                <td>Diabetes Distress</td>
                <td>depression</td>
                <td>2810</td>
              </tr>
              <tr valign="top">
                <td>Health care system</td>
                <td>hospital</td>
                <td>2721</td>
              </tr>
              <tr valign="top">
                <td>Diabetes Distress</td>
                <td>stress</td>
                <td>2681</td>
              </tr>
              <tr valign="top">
                <td>Nutrition</td>
                <td>sugar</td>
                <td>2369</td>
              </tr>
              <tr valign="top">
                <td>Nutrition</td>
                <td>fasting</td>
                <td>2363</td>
              </tr>
              <tr valign="top">
                <td>Insulin</td>
                <td>rationing insulin</td>
                <td>2244</td>
              </tr>
              <tr valign="top">
                <td>Health</td>
                <td>gestational diabetes</td>
                <td>2076</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>The most frequent cause-effect relationships excluding the cluster “diabetes” with the number of occurrences.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="430"/>
            <col width="430"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Cause</td>
                <td>Effect</td>
                <td>Value (n)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>unable to afford insulin</td>
                <td>death</td>
                <td>1246</td>
              </tr>
              <tr valign="top">
                <td>insulin</td>
                <td>death</td>
                <td>1156</td>
              </tr>
              <tr valign="top">
                <td>type 1 diabetes</td>
                <td>fear</td>
                <td>1054</td>
              </tr>
              <tr valign="top">
                <td>type 1 diabetes</td>
                <td>death</td>
                <td>999</td>
              </tr>
              <tr valign="top">
                <td>rationing insulin</td>
                <td>death</td>
                <td>805</td>
              </tr>
              <tr valign="top">
                <td>type 1 diabetes</td>
                <td>insulin</td>
                <td>751</td>
              </tr>
              <tr valign="top">
                <td>oral glucose tolerance test</td>
                <td>sick</td>
                <td>584</td>
              </tr>
              <tr valign="top">
                <td>type 1 diabetes</td>
                <td>hypoglycemia</td>
                <td>578</td>
              </tr>
              <tr valign="top">
                <td>insulin</td>
                <td>hypo</td>
                <td>545</td>
              </tr>
              <tr valign="top">
                <td>insulin</td>
                <td>fear</td>
                <td>534</td>
              </tr>
              <tr valign="top">
                <td>type 1 diabetes</td>
                <td>insulin pump</td>
                <td>436</td>
              </tr>
              <tr valign="top">
                <td>finance</td>
                <td>death</td>
                <td>423</td>
              </tr>
              <tr valign="top">
                <td>type 1 diabetes</td>
                <td>sick</td>
                <td>400</td>
              </tr>
              <tr valign="top">
                <td>insulin</td>
                <td>sick</td>
                <td>385</td>
              </tr>
              <tr valign="top">
                <td>insulin</td>
                <td>finance</td>
                <td>367</td>
              </tr>
              <tr valign="top">
                <td>type 1 diabetes</td>
                <td>anger</td>
                <td>356</td>
              </tr>
              <tr valign="top">
                <td>insulin</td>
                <td>medication</td>
                <td>305</td>
              </tr>
              <tr valign="top">
                <td>insulin</td>
                <td>anger</td>
                <td>296</td>
              </tr>
              <tr valign="top">
                <td>oral glucose tolerance test</td>
                <td>fear</td>
                <td>293</td>
              </tr>
              <tr valign="top">
                <td>type 2 diabetes</td>
                <td>death</td>
                <td>293</td>
              </tr>
              <tr valign="top">
                <td>type 2 diabetes</td>
                <td>fear</td>
                <td>290</td>
              </tr>
              <tr valign="top">
                <td>hypertension</td>
                <td>death</td>
                <td>286</td>
              </tr>
              <tr valign="top">
                <td>overweight</td>
                <td>death</td>
                <td>280</td>
              </tr>
              <tr valign="top">
                <td>type 1 diabetes</td>
                <td>finance</td>
                <td>277</td>
              </tr>
              <tr valign="top">
                <td>hypoglycemia</td>
                <td>insulin</td>
                <td>272</td>
              </tr>
              <tr valign="top">
                <td>hypoglycemia</td>
                <td>sick</td>
                <td>263</td>
              </tr>
              <tr valign="top">
                <td>affordable insulin</td>
                <td>death</td>
                <td>262</td>
              </tr>
              <tr valign="top">
                <td>insulin</td>
                <td>insulin pump</td>
                <td>255</td>
              </tr>
              <tr valign="top">
                <td>complications</td>
                <td>death</td>
                <td>248</td>
              </tr>
              <tr valign="top">
                <td>insulin</td>
                <td>sadness</td>
                <td>240</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Cause-effect network with a minimum number of associations (edges) of 250. Accessible in [<xref ref-type="bibr" rid="ref52">52</xref>].</p>
          </caption>
          <graphic xlink:href="medinform_v10i7e37201_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our findings suggest that it is feasible to extract both explicit and implicit causes and associated effects from diabetes-related Twitter data. We demonstrated that by adopting the transfer learning paradigm and fine-tuning a pretrained language model, we were able to detect causal sentences. Moreover, we have shown that simply fine-tuning a BERT-based model does not always outperform more traditional methods such as relying on CRFs in the case of the cause-effect pair detection. The precision, recall, and F1 scores, given the challenging task and the imbalanced data set, were satisfying. The semisupervised clustering and interactive visualization enabled us to identify “diabetes” as the largest cluster acting mainly as the cause for “death” and “fear.” Besides, a central cluster was detected in “death” acting as an effect for various causes related to insulin pricing—a link that was already detected in earlier works [<xref ref-type="bibr" rid="ref9">9</xref>]. From a patient’s perspective, we were able to show that their main fear is insulin pricing, which is expressed in the most frequent cause-effect relationship “unable to afford insulin” causing “death” or “rationing insulin” causing “death.” As the main diabetes distress–related causes, we identified fear of hypoglycemia, insulin, hypertension, or the oral glucose tolerance test.</p>
      </sec>
      <sec>
        <title>Comparison With Previous Works</title>
        <p>Several former works have addressed causality on Twitter data. Doan et al [<xref ref-type="bibr" rid="ref14">14</xref>] focused on 3 health-related concepts, namely, stress, insomnia, and headache as effects and identified causes by using manually crafted patterns and rules. However, they only focused on explicit causality and excluded causes and effects encoded in hashtags and synonymous expressions [<xref ref-type="bibr" rid="ref14">14</xref>]. On the contrary, we tackled both explicit and implicit causality, including causes and effects in hashtags and exploiting synonymous expressions through the use of word embeddings. Kayesh et al [<xref ref-type="bibr" rid="ref16">16</xref>] proposed an innovative approach, a novel technique based on neural networks, which uses common sense background knowledge to enhance the feature set, but they focused on the simplified version of explicit causality in tweets. Bollegala et al [<xref ref-type="bibr" rid="ref53">53</xref>] developed a causality-sensitive approach for detecting adverse drug reactions from social media by using lexical patterns and thereby aiming at explicit causality. Dasgupta et al [<xref ref-type="bibr" rid="ref54">54</xref>] proposed one of the few deep learning approaches due to the unavailability of appropriate training data, leveraging a recursive neural network architecture to detect cause-effect relations from text, but they also only targeted explicit causality. A BERT-based approach tackling both explicit and implicit causality is provided by Khetan et al [<xref ref-type="bibr" rid="ref23">23</xref>] who used already existing labeled corpora not based on social media data. Recently, they further extended their work of explicit and implicit causality understanding in single and multiple sentences but in clinical notes [<xref ref-type="bibr" rid="ref55">55</xref>]. To the best of our knowledge, this is the first paper investigating both explicit and implicit cause-effect relationships on diabetes-related Twitter data.</p>
      </sec>
      <sec>
        <title>Strengths and Limitations</title>
        <p>This study demonstrates various strengths. First, by leveraging powerful language models, we were able to identify a large number of tweets containing <italic>cause-effect</italic> relationships, which enabled us to the detect cause-effect associations in 20% (96,676/482,583) of the sentences, contrary to other approaches that were able to identify causality in less than 2% of tweets [<xref ref-type="bibr" rid="ref14">14</xref>]. Second, contrary to most previous work, we tackled both explicit and implicit <italic>causal relationships</italic>, an additional explanation for the higher number of <italic>cause-effect</italic> associations we obtained, compared to other studies focusing only on explicit associations [<xref ref-type="bibr" rid="ref14">14</xref>]. Third, relying fully on automatic machine learning algorithms avoided us from defining manually crafted patterns to detect causal associations. Fourth, operating on social media data that are expressed spontaneously and in real time offers the opportunity to gain knowledge from an alternative data source and, in particular, from a patient’s perspective, which might complement traditional epidemiological data sources. Lastly, the data-driven approach to identify cause-effect relationships, as reported from Twitter users, can be used in the next step to generate new hypotheses that can be tested in a more clinical setting, for example, in a clinical trial.</p>
        <p>A strong limitation is that <italic>cause-effect</italic> relations are expressed in tweets and this cannot be used for causal inference as the Twitter data source is uncertain and the information shared can be an opinion or an observation. Another shortcoming is that the performance of our algorithms to detect <italic>cause-effect</italic> pairs is not perfect. However, the overall process and the vast amount of data minimize this issue. The lack of recall is counterbalanced by the sheer amount of data, and the lack of precision is counterbalanced by the clustering approach in which nonfrequent causes or effects are discarded [<xref ref-type="bibr" rid="ref56">56</xref>]. Labeling causes and effects in a data set is a highly complicated task, and we would like to emphasize that mislabeling in the data set may occur. Here, the actual prevalence of causal sentences is lower, as we wanted to catch as many causal sentences as possible, which led to also having captured some noncausal sentences. Enhancing data quality certainly is a strong point to address to further improve performance. The causal association structures learnt by the model from the training set might not generalize completely when applied on the large amount of Twitter data. Besides, the active learning strategy certainly added noise to the model, as only positive samples were corrected, which could be improved in future investigations. Moreover, we would like to highlight that the diabetes-related information shared on Twitter may not be representative for all people with diabetes. For instance, we observed a bigger cluster of causes/effects related to type 1 diabetes compared to that related to type 2 diabetes, which is contrary to that in the real world [<xref ref-type="bibr" rid="ref57">57</xref>]. A potential explanation for that is the age distribution of Twitter users [<xref ref-type="bibr" rid="ref58">58</xref>]. However, owing to the large number of tweets analyzed, a significant variability in the tweets could be observed.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>In this work, we developed an innovative methodology to identify possible cause-effect relationships among diabetes-related tweets. This task was challenging owing to addressing both explicit and implicit causality, multiword entities, the fact that a word could be both cause or effect, the open domain of causes and effects, the biases occurring during labeling of causality, and the relatively small data set for this complex task. We overcame these challenges by augmenting the small data set via an active learning loop. The feasibility of our approach was demonstrated using modern BERT-based architectures in the preprocessing and causal sentence detection. A combination of BERT features and CRF layer were leveraged to extract causes and effects in diabetes-related tweets, which were then aggregated to clusters in a semisupervised approach. The visualization of the cause-effect network based on Twitter data can deepen our understanding of diabetes, in a way of directly capturing patient-reported outcomes from a causal perspective. The fear of death owing to the inability to afford insulin was the main concern expressed.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>List of diabetes-related keywords for the Twitter application programming interface tweet extraction.</p>
        <media xlink:href="medinform_v10i7e37201_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 47 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Preprocessing pipeline.</p>
        <media xlink:href="medinform_v10i7e37201_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 64 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Annotation guidelines.</p>
        <media xlink:href="medinform_v10i7e37201_app3.pdf" xlink:title="PDF File  (Adobe PDF File), 120 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Most frequent clusters.</p>
        <media xlink:href="medinform_v10i7e37201_app4.pdf" xlink:title="PDF File  (Adobe PDF File), 75 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BIO</term>
          <def>
            <p>Beginning, Inside, Outside</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CRF</term>
          <def>
            <p>conditional random field</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the MSDAVENIR Foundation, the French Speaking Diabetes Society, and the Luxembourg Institute of Health. These study sponsors had no role in the design or the interpretation of the results of this study. AA, FO, and TC are supported by Epiconcept Company. Epiconcept was involved in the data collection and writing of the report. No study sponsor influenced the decision to submit the paper for publication.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hessler</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Polonsky</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Mullan</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>When is diabetes distress clinically meaningful?: establishing cut points for the Diabetes Distress Scale</article-title>
          <source>Diabetes Care</source>
          <year>2012</year>
          <month>02</month>
          <volume>35</volume>
          <issue>2</issue>
          <fpage>259</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.2337/dc11-1572</pub-id>
          <pub-id pub-id-type="medline">22228744</pub-id>
          <pub-id pub-id-type="pii">dc11-1572</pub-id>
          <pub-id pub-id-type="pmcid">PMC3263871</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polonsky</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Hessler</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Masharani</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Blumer</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Strycker</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Bowyer</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Understanding the sources of diabetes distress in adults with type 1 diabetes</article-title>
          <source>J Diabetes Complications</source>
          <year>2015</year>
          <volume>29</volume>
          <issue>4</issue>
          <fpage>572</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jdiacomp.2015.01.012</pub-id>
          <pub-id pub-id-type="medline">25765489</pub-id>
          <pub-id pub-id-type="pii">S1056-8727(15)00045-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC4414881</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coccaro</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lazarus</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Joseph</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wyne</surname>
              <given-names>Kathline</given-names>
            </name>
            <name name-style="western">
              <surname>Drossos</surname>
              <given-names>Tina</given-names>
            </name>
            <name name-style="western">
              <surname>Phillipson</surname>
              <given-names>Louis</given-names>
            </name>
            <name name-style="western">
              <surname>de Groot</surname>
              <given-names>Mary</given-names>
            </name>
          </person-group>
          <article-title>Emotional Regulation and Diabetes Distress in Adults With Type 1 and Type 2 Diabetes</article-title>
          <source>Diabetes Care</source>
          <year>2021</year>
          <month>01</month>
          <volume>44</volume>
          <issue>1</issue>
          <fpage>20</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.2337/dc20-1059</pub-id>
          <pub-id pub-id-type="medline">33444157</pub-id>
          <pub-id pub-id-type="pii">dc20-1059</pub-id>
          <pub-id pub-id-type="pmcid">PMC8742145</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carper</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Traeger</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Wexler</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Psaros</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Safren</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>The differential associations of depression and diabetes distress with quality of life domains in type 2 diabetes</article-title>
          <source>J Behav Med</source>
          <year>2014</year>
          <month>06</month>
          <volume>37</volume>
          <issue>3</issue>
          <fpage>501</fpage>
          <lpage>10</lpage>
          <pub-id pub-id-type="doi">10.1007/s10865-013-9505-x</pub-id>
          <pub-id pub-id-type="medline">23515932</pub-id>
          <pub-id pub-id-type="pmcid">PMC3758402</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cummings</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Lutes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Littlewood</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>DiNatale</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hambidge</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Schulman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Morisky</surname>
              <given-names>DE</given-names>
            </name>
          </person-group>
          <article-title>Regimen-Related Distress, Medication Adherence, and Glycemic Control in Rural African American Women With Type 2 Diabetes Mellitus</article-title>
          <source>Ann Pharmacother</source>
          <year>2014</year>
          <month>08</month>
          <volume>48</volume>
          <issue>8</issue>
          <fpage>970</fpage>
          <lpage>977</lpage>
          <pub-id pub-id-type="doi">10.1177/1060028014536532</pub-id>
          <pub-id pub-id-type="medline">24904183</pub-id>
          <pub-id pub-id-type="pii">1060028014536532</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mullan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Skaff</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Glasgow</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Arean</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hessler</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Predicting diabetes distress in patients with Type 2 diabetes: a longitudinal study</article-title>
          <source>Diabet Med</source>
          <year>2009</year>
          <month>06</month>
          <volume>26</volume>
          <issue>6</issue>
          <fpage>622</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1464-5491.2009.02730.x</pub-id>
          <pub-id pub-id-type="medline">19538238</pub-id>
          <pub-id pub-id-type="pii">DME2730</pub-id>
          <pub-id pub-id-type="pmcid">PMC2740749</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pandit</surname>
              <given-names>AU</given-names>
            </name>
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Curtis</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Seligman</surname>
              <given-names>HK</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Parker</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Schillinger</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>DeWalt</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Fleming</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mohr</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Disease-related distress, self-care and clinical outcomes among low-income patients with diabetes</article-title>
          <source>J Epidemiol Community Health</source>
          <year>2014</year>
          <month>06</month>
          <volume>68</volume>
          <issue>6</issue>
          <fpage>557</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1136/jech-2013-203063</pub-id>
          <pub-id pub-id-type="medline">24489044</pub-id>
          <pub-id pub-id-type="pii">jech-2013-203063</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>van Loon</surname>
              <given-names>BJP</given-names>
            </name>
            <name name-style="western">
              <surname>Vergouwen</surname>
              <given-names>ACM</given-names>
            </name>
            <name name-style="western">
              <surname>Snoek</surname>
              <given-names>FJ</given-names>
            </name>
            <name name-style="western">
              <surname>Honig</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Systematic review and meta-analysis of psychological interventions in people with diabetes and elevated diabetes-distress</article-title>
          <source>Diabet. Med</source>
          <year>2018</year>
          <month>06</month>
          <day>30</day>
          <volume>35</volume>
          <issue>9</issue>
          <fpage>1157</fpage>
          <lpage>1172</lpage>
          <pub-id pub-id-type="doi">10.1111/dme.13709</pub-id>
          <pub-id pub-id-type="medline">29896760</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ahne</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Orchard</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Tannier</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Perchoux</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Balkau</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Pagoto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Harding</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Czernichow</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Fagherazzi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Insulin pricing and other major diabetes-related concerns in the USA: a study of 46 407 tweets between 2017 and 2019</article-title>
          <source>BMJ Open Diabetes Res Care</source>
          <year>2020</year>
          <month>06</month>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>e001190</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://drc.bmj.com/lookup/pmidlookup?view=long&#38;pmid=32503810"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjdrc-2020-001190</pub-id>
          <pub-id pub-id-type="medline">32503810</pub-id>
          <pub-id pub-id-type="pii">8/1/e001190</pub-id>
          <pub-id pub-id-type="pmcid">PMC7282343</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Balkhi</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Reid</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>McNamara</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Geffken</surname>
              <given-names>GR</given-names>
            </name>
          </person-group>
          <article-title>The diabetes online community: the importance of forum use in parents of children with type 1 diabetes</article-title>
          <source>Pediatr Diabetes</source>
          <year>2014</year>
          <month>09</month>
          <day>25</day>
          <volume>15</volume>
          <issue>6</issue>
          <fpage>408</fpage>
          <lpage>15</lpage>
          <pub-id pub-id-type="doi">10.1111/pedi.12110</pub-id>
          <pub-id pub-id-type="medline">24372986</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Poon</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A survey on extraction of causal relations from natural language text</article-title>
          <source>ArXiv</source>
          <year>2021</year>
          <month>11</month>
          <day>01</day>
          <access-date>2022-05-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2101.06426">http://arxiv.org/abs/2101.06426</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Doan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ritchart</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Perry</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chaparro</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Conway</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>How Do You #relax When You're #stressed? A Content Analysis and Infodemiology Study of Stress-Related Tweets</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2017</year>
          <month>06</month>
          <day>13</day>
          <volume>3</volume>
          <issue>2</issue>
          <fpage>e35</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2017/2/e35/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/publichealth.5939</pub-id>
          <pub-id pub-id-type="medline">28611016</pub-id>
          <pub-id pub-id-type="pii">v3i2e35</pub-id>
          <pub-id pub-id-type="pmcid">PMC5487742</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cocos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fiks</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Masino</surname>
              <given-names>AJ</given-names>
            </name>
          </person-group>
          <article-title>Deep learning for pharmacovigilance: recurrent neural network architectures for labeling adverse drug reactions in Twitter posts</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2017</year>
          <month>07</month>
          <day>01</day>
          <volume>24</volume>
          <issue>4</issue>
          <fpage>813</fpage>
          <lpage>821</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocw180</pub-id>
          <pub-id pub-id-type="medline">28339747</pub-id>
          <pub-id pub-id-type="pii">3041102</pub-id>
          <pub-id pub-id-type="pmcid">PMC7651964</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Doan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Tilak</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Zisook</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Torii</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Extracting health-related causality from twitter messages using natural language processing</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <month>04</month>
          <day>04</day>
          <volume>19</volume>
          <issue>Suppl 3</issue>
          <fpage>79</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-0785-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-0785-0</pub-id>
          <pub-id pub-id-type="medline">30943954</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-0785-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6448183</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khoo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Extracting causal knowledge from a medical database using graphical patterns</article-title>
          <year>2000</year>
          <conf-name>Proceedings of the 38th Annual Meeting on Association for Computational Linguistics</conf-name>
          <conf-date>October</conf-date>
          <conf-loc>Hong Kong</conf-loc>
          <fpage>336</fpage>
          <lpage>343</lpage>
          <pub-id pub-id-type="doi">10.3115/1075218.1075261</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kayesh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Islam</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>On event causality detection in tweets</article-title>
          <source>ArXiv</source>
          <year>2019</year>
          <access-date>2022-01-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1901.03526">http://arxiv.org/abs/1901.03526</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khoo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>The many facets of the cause-effect relation</article-title>
          <source>The Semantics of Relationships</source>
          <year>2002</year>
          <publisher-loc>Dordrecht</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chowdhury</surname>
              <given-names>GG</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing</article-title>
          <source>Ann Rev Info Sci Tech</source>
          <year>2005</year>
          <month>01</month>
          <day>31</day>
          <volume>37</volume>
          <issue>1</issue>
          <fpage>51</fpage>
          <lpage>89</lpage>
          <pub-id pub-id-type="doi">10.1002/aris.1440370103</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>El Naqa</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>What is machine learning?</article-title>
          <source>Machine Learning in Radiation Oncology</source>
          <year>2015</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer International Publishing</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Classifying relations via long short term memory networks along shortest dependency paths</article-title>
          <year>2015</year>
          <conf-name>Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>September</conf-date>
          <conf-loc>Lisbon</conf-loc>
          <fpage>1785</fpage>
          <lpage>1794</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/d15-1206</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Melo</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Relation classification via multi-level attention CNNs</article-title>
          <year>2016</year>
          <conf-name>Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>August</conf-date>
          <conf-loc>Berlin</conf-loc>
          <fpage>1298</fpage>
          <lpage>1307</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/P16-1123</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ponti</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Korhonen</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Event-related features in feedforward neural networks contribute to identifying causal relations in discourse</article-title>
          <year>2017</year>
          <conf-name>LSDSem 2017 - 2nd Workshop on Linking Models of Lexical, Sentential and Discourse-Level Semantics, Proceedings of the Workshop</conf-name>
          <conf-date>April</conf-date>
          <conf-loc>Valencia, Spain</conf-loc>
          <publisher-loc>In</publisher-loc>
          <fpage>25</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">https://doi.org/10.17863/CAM.9725</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khetan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ramnani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Anand</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sengupta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fano</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Arai</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Causal BERT: language models for causality detection between events expressed in text</article-title>
          <source>Intelligent Computing</source>
          <year>2021</year>
          <month>07</month>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer International Publishing</publisher-name>
          <fpage>965</fpage>
          <lpage>980</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pretraining of deep bidirectional transformers for language understanding</article-title>
          <source>ArXiv</source>
          <year>2018</year>
          <access-date>2021-05-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1810.04805">http://arxiv.org/abs/1810.04805</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>ArXiv</source>
          <year>2017</year>
          <access-date>2021-09-09</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1706.03762">http://arxiv.org/abs/1706.03762</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gurulingappa</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rajput</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fluck</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hofmann-Apitius</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Toldo</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Development of a benchmark corpus to support the automatic extraction of drug-related adverse effects from medical case reports</article-title>
          <source>J Biomed Inform</source>
          <year>2012</year>
          <month>10</month>
          <volume>45</volume>
          <issue>5</issue>
          <fpage>885</fpage>
          <lpage>92</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(12)00061-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2012.04.008</pub-id>
          <pub-id pub-id-type="medline">22554702</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(12)00061-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Khoshgoftaar</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A survey of transfer learning</article-title>
          <source>J Big Data</source>
          <year>2016</year>
          <month>5</month>
          <day>28</day>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1186/s40537-016-0043-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fagherazzi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Challenges and perspectives for the future of diabetes epidemiology in the era of digital health and artificial intelligence</article-title>
          <source>Diabetes Epidemiology and Management</source>
          <year>2021</year>
          <month>01</month>
          <volume>1</volume>
          <fpage>100004</fpage>
          <pub-id pub-id-type="doi">10.1016/j.deman.2021.100004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fagherazzi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ahne</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Guillot</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Riveline</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bonnet</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Mebarki</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schuck</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Czernichow</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Jeannerod</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Orchard</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Étude mondiale de la détresse liée au diabète : le potentiel du réseau social Twitter pour la recherche médicale</article-title>
          <source>Revue d'Épidémiologie et de Santé Publique</source>
          <year>2018</year>
          <month>06</month>
          <volume>66</volume>
          <fpage>S197</fpage>
          <lpage>S198</lpage>
          <pub-id pub-id-type="doi">10.1016/j.respe.2018.04.002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <article-title>Twitter privacy policy</article-title>
          <source>Twitter</source>
          <year>2021</year>
          <access-date>2021-07-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://twitter.com/en/privacy">https://twitter.com/en/privacy</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Hanauer</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Use of Social Media in the Diabetes Community: An Exploratory Analysis of Diabetes-Related Tweets</article-title>
          <source>JMIR Diabetes</source>
          <year>2016</year>
          <month>11</month>
          <day>07</day>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>e4</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://diabetes.jmir.org/2016/2/e4/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/diabetes.6256</pub-id>
          <pub-id pub-id-type="medline">30291053</pub-id>
          <pub-id pub-id-type="pii">v1i2e4</pub-id>
          <pub-id pub-id-type="pmcid">PMC6238851</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Vu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>BERTweet: A pretrained language model for English tweets</article-title>
          <source>ArXiv</source>
          <year>2020</year>
          <access-date>2021-12-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2005.10200">http://arxiv.org/abs/2005.10200</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Debut</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sanh</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>HuggingFace's transformers: state-of-the-art natural language processing</article-title>
          <source>ArXiv</source>
          <year>2020</year>
          <access-date>2021-12-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1910.03771">http://arxiv.org/abs/1910.03771</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Parrott</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <source>Emotions in Social Psychology: Essential Readings</source>
          <year>2001</year>
          <publisher-loc>Hove, East Sussex, United Kingdom</publisher-loc>
          <publisher-name>Psychology Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Polonsky</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lohrer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Welch</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobson</surname>
              <given-names>A M</given-names>
            </name>
            <name name-style="western">
              <surname>Aponte</surname>
              <given-names>J E</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>C E</given-names>
            </name>
          </person-group>
          <article-title>Assessment of diabetes-related distress</article-title>
          <source>Diabetes Care</source>
          <year>1995</year>
          <month>06</month>
          <volume>18</volume>
          <issue>6</issue>
          <fpage>754</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.2337/diacare.18.6.754</pub-id>
          <pub-id pub-id-type="medline">7555499</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Polonsky</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Earles</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dudl</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lees</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mullan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>RA</given-names>
            </name>
          </person-group>
          <article-title>Assessing psychosocial distress in diabetes: development of the diabetes distress scale</article-title>
          <source>Diabetes Care</source>
          <year>2005</year>
          <month>03</month>
          <volume>28</volume>
          <issue>3</issue>
          <fpage>626</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.2337/diacare.28.3.626</pub-id>
          <pub-id pub-id-type="medline">15735199</pub-id>
          <pub-id pub-id-type="pii">28/3/626</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beguerisse-Díaz</surname>
              <given-names>Mariano</given-names>
            </name>
            <name name-style="western">
              <surname>McLennan</surname>
              <given-names>Amy K</given-names>
            </name>
            <name name-style="western">
              <surname>Garduño-Hernández</surname>
              <given-names>Guillermo</given-names>
            </name>
            <name name-style="western">
              <surname>Barahona</surname>
              <given-names>Mauricio</given-names>
            </name>
            <name name-style="western">
              <surname>Ulijaszek</surname>
              <given-names>Stanley J</given-names>
            </name>
          </person-group>
          <article-title>The 'who' and 'what' of #diabetes on Twitter</article-title>
          <source>Digit Health</source>
          <year>2017</year>
          <volume>3</volume>
          <fpage>2055207616688841</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/2055207616688841?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/2055207616688841</pub-id>
          <pub-id pub-id-type="medline">29942579</pub-id>
          <pub-id pub-id-type="pii">10.1177_2055207616688841</pub-id>
          <pub-id pub-id-type="pmcid">PMC6001201</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnsen</surname>
              <given-names>Jan-Are K</given-names>
            </name>
            <name name-style="western">
              <surname>Eggesvik</surname>
              <given-names>Trude B</given-names>
            </name>
            <name name-style="western">
              <surname>Rørvik</surname>
              <given-names>Thea H</given-names>
            </name>
            <name name-style="western">
              <surname>Hanssen</surname>
              <given-names>Miriam W</given-names>
            </name>
            <name name-style="western">
              <surname>Wynn</surname>
              <given-names>Rolf</given-names>
            </name>
            <name name-style="western">
              <surname>Kummervold</surname>
              <given-names>Per Egil</given-names>
            </name>
          </person-group>
          <article-title>Differences in Emotional and Pain-Related Language in Tweets About Dentists and Medical Doctors: Text Analysis of Twitter Content</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2019</year>
          <month>02</month>
          <day>06</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>e10432</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2019/1/e10432/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/10432</pub-id>
          <pub-id pub-id-type="medline">30724738</pub-id>
          <pub-id pub-id-type="pii">v5i1e10432</pub-id>
          <pub-id pub-id-type="pmcid">PMC6381402</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RoBERTa: A robustly optimized BERT pretraining approach</article-title>
          <source>Arxiv</source>
          <year>2019</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1907.11692">https://arxiv.org/abs/1907.11692</ext-link>
          </comment>
          <pub-id pub-id-type="doi">10.48550/ARXIV.1907.11692</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Princeton University</collab>
          </person-group>
          <source>About WordNet</source>
          <year>2010</year>
          <access-date>2019-04-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://wordnet.princeton.edu/">https://wordnet.princeton.edu/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A Coefficient of Agreement for Nominal Scales</article-title>
          <source>Educational and Psychological Measurement</source>
          <year>2016</year>
          <month>07</month>
          <day>02</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>37</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1177/001316446002000104</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>Practical Statistics for Medical Research</source>
          <year>1990</year>
          <publisher-loc>United Kingdom</publisher-loc>
          <publisher-name>Chapman &#38; Hall</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Landis</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Koch</surname>
              <given-names>GG</given-names>
            </name>
          </person-group>
          <article-title>The Measurement of Observer Agreement for Categorical Data</article-title>
          <source>Biometrics</source>
          <year>1977</year>
          <month>03</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>159</fpage>
          <pub-id pub-id-type="doi">10.2307/2529310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Settles</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <source>Active Learning Literature Survey</source>
          <year>2010</year>
          <access-date>2021-08-08</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://burrsettles.com/pub/settles.activelearning.pdf?source=post_page">http://burrsettles.com/pub/settles.activelearning.pdf?source=post_page</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lease</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Active discriminative text representation learning</article-title>
          <source>ArXiv</source>
          <year>2016</year>
          <access-date>2021-09-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1606.04212">http://arxiv.org/abs/1606.04212</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Koller</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Support vector machine active learning with applications to text classification</article-title>
          <source>J Mach Learn Res</source>
          <year>2001</year>
          <volume>2</volume>
          <fpage>45</fpage>
          <lpage>66</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume2/tong01a/tong01a.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1162/153244302760185243</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ramshaw</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Text chunking using transformation-based learning</article-title>
          <source>ArXiv</source>
          <year>1999</year>
          <access-date>2021-09-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/cmp-lg/9505040">http://arxiv.org/abs/cmp-lg/9505040</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lafferty</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mccallum</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pereira</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Conditional random fields: probabilistic models for segmenting and labeling sequence data</article-title>
          <source>University of Pennsylvania</source>
          <year>2001</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://repository.upenn.edu/cgi/viewcontent.cgi?article=1162&#38;context=cis_papers">https://repository.upenn.edu/cgi/viewcontent.cgi?article=1162&#38;context=cis_papers</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Korobov</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>sklearn-crfsuite</source>
          <access-date>2021-09-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://sklearn-crfsuite.readthedocs.io/en/latest/index.html">https://sklearn-crfsuite.readthedocs.io/en/latest/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Okazaki</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>A fast implementation of conditional random fields (CRFs)</article-title>
          <source>CRFsuite</source>
          <year>2007</year>
          <access-date>2021-10-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.chokkan.org/software/crfsuite/">http://www.chokkan.org/software/crfsuite/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="web">
          <article-title>WDDS/Causal-associations-diabetes-twitter</article-title>
          <source>GitHub</source>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/WDDS/Causal-associations-diabetes-twitter/">https://github.com/WDDS/Causal-associations-diabetes-twitter/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <article-title>Cause and effect associations in diabetes-related tweets</article-title>
          <source>Adahne</source>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://observablehq.com/@adahne/cause-and-effect-associations-in-diabetes-related-tweets">https://observablehq.com/@adahne/cause-and-effect-associations-in-diabetes-related-tweets</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bollegala</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Maskell</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sloane</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hajne</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pirmohamed</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Causality Patterns for Detecting Adverse Drug Reactions From Social Media: Text Mining Approach</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2018</year>
          <month>05</month>
          <day>09</day>
          <volume>4</volume>
          <issue>2</issue>
          <fpage>e51</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2018/2/e51/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/publichealth.8214</pub-id>
          <pub-id pub-id-type="medline">29743155</pub-id>
          <pub-id pub-id-type="pii">v4i2e51</pub-id>
          <pub-id pub-id-type="pmcid">PMC5966656</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dasgupta</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dey</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Naskar</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Automatic Extraction of Causal Relations from Text using Linguistically Informed Deep Neural Networks</article-title>
          <year>2018</year>
          <conf-name>Proceedings of the 19th Annual SIGdial Meeting on Discourse and Dialogue</conf-name>
          <conf-date>April</conf-date>
          <conf-loc>Melbourne</conf-loc>
          <fpage>306</fpage>
          <lpage>316</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.superlectures.com/sigdial2018/automatic-extraction-of-causal-relations-from-text-using-linguistically-informed-deep-neural-networks"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/w18-5035</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khetan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Rizvi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Huber</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bartusiak</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sacaleanu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Fano</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>MIMICause: Defining, identifying and predicting types of causal relationships between biomedical concepts from clinical notes</article-title>
          <source>ArXiv</source>
          <access-date>2021-12-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2110.07090">http://arxiv.org/abs/2110.07090</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tannier</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <source>NLP-driven Data Journalism: Time-Aware Mining and Visualization of International Alliances</source>
          <year>2016</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hal.archives-ouvertes.fr/hal-02407145/document">https://hal.archives-ouvertes.fr/hal-02407145/document</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="web">
          <source>International Diabetes Federation Diabetes Atlas, 9th edn</source>
          <year>2019</year>
          <access-date>2021-12-09</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.diabetesatlas.org">https://www.diabetesatlas.org</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="web">
          <article-title>Percentage of US adults who use Twitter as of February 2021, by age group</article-title>
          <source>Statista</source>
          <year>2021</year>
          <access-date>2021-10-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.statista.com/statistics/265647/share-of-us-internet-users-who-use-twitter-by-age-group/">https://www.statista.com/statistics/265647/share-of-us-internet-users-who-use-twitter-by-age-group/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
