<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i7e17958</article-id>
      <article-id pub-id-type="pmid">32723719</article-id>
      <article-id pub-id-type="doi">10.2196/17958</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Depression Risk Prediction for Chinese Microblogs via Deep-Learning Methods: Content Analysis</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Bian</surname>
            <given-names>Jiang</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Yang</surname>
            <given-names>Xi</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhang</surname>
            <given-names>Liangliang</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lim</surname>
            <given-names>Gilbert</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Xiaofeng</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6259-7971</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Shuai</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5739-9022</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Tao</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7013-5272</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Wanting</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8101-5567</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Zhou</surname>
            <given-names>Yejie</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1618-4903</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>Jie</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4040-3130</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Qingcai</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8473-7293</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Yan</surname>
            <given-names>Jun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2497-5518</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Tang</surname>
            <given-names>Buzhou</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Computer Science</institution>
            <institution>Harbin Institute of Technology Shenzhen Graduate School</institution>
            <addr-line>L1407</addr-line>
            <addr-line>Shenzhen</addr-line>
            <country>China</country>
            <phone>86 13725525983</phone>
            <email>tangbuzhou@gmail.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0271-8246</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>School of Communication</institution>
        <institution>Shenzhen University</institution>
        <addr-line>Shenzhen</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Computer Science</institution>
        <institution>Harbin Institute of Technology Shenzhen Graduate School</institution>
        <addr-line>Shenzhen</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Yidu Cloud (Beijing) Technology Co Ltd</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Buzhou Tang <email>tangbuzhou@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>29</day>
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>7</issue>
      <elocation-id>e17958</elocation-id>
      <history>
        <date date-type="received">
          <day>24</day>
          <month>1</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>4</day>
          <month>4</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>30</day>
          <month>5</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>1</day>
          <month>6</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Xiaofeng Wang, Shuai Chen, Tao Li, Wanting Li, Yejie Zhou, Jie Zheng, Qingcai Chen, Jun Yan, Buzhou Tang. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 29.07.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2020/7/e17958/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Depression is a serious personal and public mental health problem. Self-reporting is the main method used to diagnose depression and to determine the severity of depression. However, it is not easy to discover patients with depression owing to feelings of shame in disclosing or discussing their mental health conditions with others. Moreover, self-reporting is time-consuming, and usually leads to missing a certain number of cases. Therefore, automatic discovery of patients with depression from other sources such as social media has been attracting increasing attention. Social media, as one of the most important daily communication systems, connects  large quantities of people, including individuals with depression, and provides a channel to discover patients with depression. In this study, we investigated deep-learning methods for depression risk prediction using data from Chinese microblogs, which have potential to discover more patients with depression and to trace their mental health conditions.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to explore the potential of state-of-the-art deep-learning methods on depression risk prediction from Chinese microblogs.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Deep-learning methods with pretrained language representation models, including bidirectional encoder representations from transformers (BERT), robustly optimized BERT pretraining approach (RoBERTa), and generalized autoregressive pretraining for language understanding (XLNET), were investigated for depression risk prediction, and were compared with previous methods on a manually annotated benchmark dataset. Depression risk was assessed at four levels from 0 to 3, where 0, 1, 2, and 3 denote no inclination, and mild, moderate, and severe depression risk, respectively. The dataset was collected from the Chinese microblog Weibo. We also compared different deep-learning methods with pretrained language representation models in two settings: (1) publicly released pretrained language representation models, and (2) language representation models further pretrained on a large-scale unlabeled dataset collected from Weibo. Precision, recall, and F1 scores were used as performance evaluation measures.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Among the three deep-learning methods, BERT achieved the best performance with a microaveraged F1 score of 0.856. RoBERTa achieved the best performance with  a macroaveraged F1 score of 0.424 on depression risk at levels 1, 2, and 3, which represents a new benchmark result on the dataset. The further pretrained language representation models demonstrated improvement over publicly released prediction models.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We applied deep-learning methods with pretrained language representation models to automatically predict depression risk using data from Chinese microblogs. The experimental results showed that the deep-learning methods performed better than previous methods, and have greater potential to discover patients with depression and to trace their mental health conditions.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>depression risk prediction</kwd>
        <kwd>deep learning</kwd>
        <kwd>pretrained language model</kwd>
        <kwd>Chinese microblogs</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Mental health is an important component of personal well-being and public health as reported by the World Health Organization (WHO) [<xref ref-type="bibr" rid="ref1">1</xref>]. Anyone—regardless of gender, financial status, and age—may suffer from mental disorders, among which depression remains the most common form [<xref ref-type="bibr" rid="ref2">2</xref>]. Depression is reported to affect more than 264 million people worldwide according to the WHO’s Comprehensive Mental Health Action Plan 2003-2020 [<xref ref-type="bibr" rid="ref3">3</xref>], and the number has been quickly increasing in recent years [<xref ref-type="bibr" rid="ref4">4</xref>]. Among various depressive illnesses, the lifetime prevalence of major depressive disorders is approximately 16%, and evidence suggests that the incidence is increasing [<xref ref-type="bibr" rid="ref5">5</xref>]. In 1997, the WHO estimated that depression will be the second most debilitating disease by 2020, behind cardiovascular disease [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
        <p>Depression is accompanied by a suite of very negative effects, as it can interfere with a person’s daily life and routine. In the short term, depression may reduce an individual’s enjoyment of life, make them withdraw from their family and friends, and ultimately feel lonely. In the long term, prolonged depression may lead to more serious conditions and illnesses. Fortunately, early recognition and treatment are proven to be helpful for people with depression to reduce the negative impacts of the disorder [<xref ref-type="bibr" rid="ref7">7</xref>]. Despite broad developments in medical technology, it remains difficult to diagnose depression due to the particularity of mental disorders [<xref ref-type="bibr" rid="ref8">8</xref>]. Currently, most diagnoses of depressive illness are based on self-reports or self-diagnosis of patients [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. The diagnosis procedures are complex and time-consuming. Moreover, a high proportion of patients with depression cannot be discovered as they do not want to disclose or discuss their mental health conditions with others. Therefore, it is urgent to find methods that can help to discover patients with depression from other channels.</p>
        <p>With the development of information technology, social media has become an important part of people’s daily life. More and more people are using social media platforms such as Twitter, Facebook, and Sina Weibo to share their thoughts, feelings, and emotional status. These social media platforms can provide a huge amount of valuable data for research. Some studies based on social media data such as personalized news recommendation [<xref ref-type="bibr" rid="ref11">11</xref>], public opinion sensing and trend analysis [<xref ref-type="bibr" rid="ref12">12</xref>], disease transmission trend monitoring [<xref ref-type="bibr" rid="ref13">13</xref>], and future patient visits prediction [<xref ref-type="bibr" rid="ref14">14</xref>] have achieved good results. In the case of depression, as social media platforms have become important forums for people with depression to interact with peers within a comfortable emotional distance [<xref ref-type="bibr" rid="ref15">15</xref>], high numbers of patients with depression tend to gather to share their feelings, emotional status, and treatment procedures. Some researchers have attempted to discover patients with depression from social media, such as by predicting depression risk embedded in text from microblogs. Accumulating evidence shows that the language and emotion posted on social media platforms could indicate depression [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>In this study, we investigated the use of deep-learning methods for depression risk prediction from data collected in Chinese microblogs. This study represents an extension of the study of Wang et al [<xref ref-type="bibr" rid="ref16">16</xref>], who presented an annotated dataset of Chinese microblogs for depression risk prediction and compared four machine-learning methods, including the deep-learning method bidirectional encoder representations from transformers (BERT) [<xref ref-type="bibr" rid="ref17">17</xref>]. Here, we further investigated three deep-learning methods with pretrained language representation models, BERT, robustly optimized BERT pretraining approach (RoBERTa) [<xref ref-type="bibr" rid="ref18">18</xref>], and generalized autoregressive pretraining for language understanding (XLNET) [<xref ref-type="bibr" rid="ref19">19</xref>], on the depression dataset and obtained new benchmark results.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <p>In early studies focused on depression detection, most of the methods applied were rule-based and those based on self-reporting or self-diagnosis. For example, Hamilton [<xref ref-type="bibr" rid="ref20">20</xref>] established a rating scale for depression to help patients with depression evaluate the severity of their depression by themselves according to a self-report. However, these methods always require domain experts to define the rules and are time-consuming. In recent years, with the rapid spread of social media, more and more information about personal daily life is publicly posted on the internet, which can be widely used for health prediction, including depression detection.</p>
        <p>Choudhury et al [<xref ref-type="bibr" rid="ref9">9</xref>] made a major contribution to the field of depression detection from social media by investigating whether social media can be used as a source of information to detect mental illness among individuals as well as within a population. Following this study, several researchers annotated some corpora for automatic depression detection, including depression level prediction. For example, Glen et al [<xref ref-type="bibr" rid="ref21">21</xref>] constructed an annotated corpus composed of 1746 users collected from Twitter for depression detection. In the corpus, the users were divided into three groups: depression users, posttraumatic stress disorder (PTSD) users, and control users. This corpus was used as the dataset of the Computational Linguistics and Clinical Psychology (CLPsych) shared task in 2015 [<xref ref-type="bibr" rid="ref22">22</xref>] to predict PTSD users from the control group, users with depression from the control group, and users with depression among users with PTSD. The system that ranked first in the CLPsych 2015 shared task was a combination system composed of 16 support vector machine (SVM)-based subsystems based on features derived using supervised linear discriminant analysis [<xref ref-type="bibr" rid="ref23">23</xref>], supervised Anchor (for topic modeling), and lexical term frequency-inverse document frequency [<xref ref-type="bibr" rid="ref24">24</xref>]. Cacheda et al [<xref ref-type="bibr" rid="ref25">25</xref>] presented a social network analysis and random forest algorithm to detect early depression. Ricard et al [<xref ref-type="bibr" rid="ref26">26</xref>] trained an elastic-net regularized linear regression model on Instagram post captions and comments to detect depression. The features used in the linear regression model included multiple sentiment scores, emoji sentiment analysis results, and metavariables such as the number of “likes” and average comment length. Lin et al [<xref ref-type="bibr" rid="ref27">27</xref>] proposed a deep neural network model to detect users’ psychological stress by incorporating two different types of user-scope attributes, and evaluated the model on four different datasets from major microblog platforms, including Sina Weibo, Tencent Weibo, and Twitter. Most of these studies focused on user-level depression detection, as summarized by Wongkoblap et al [<xref ref-type="bibr" rid="ref28">28</xref>], and the machine-learning methods used in these studies included SVM, logistic regression, decision trees [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref32">32</xref>], random forest [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>], naive Bayes [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>], K-nearest neighbor, maximum entropy [<xref ref-type="bibr" rid="ref37">37</xref>], neural network, and deep-learning neural network.</p>
        <p>To analyze social media at a fine-granularity level and track the mental health conditions of patients with depression, some researchers attempted to detect depression at the tweet level. Jamil et al [<xref ref-type="bibr" rid="ref38">38</xref>] constructed two types of datasets from Twitter for depression detection: one annotated at the tweet level consisting of 8753 tweets and the other annotated at the user level consisting of 160 users. The SVM-based system developed on these two datasets performed well at the user level, but not very well at the tweet level. Wang et al [<xref ref-type="bibr" rid="ref16">16</xref>] annotated a dataset from Sina Weibo at the microblog level (equivalent to the tweet level), in which each microblog was labeled with a depression risk ranging from 0 to 3. They compared four machine-learning methods on this dataset, including SVM, convolutional neural network (CNN), long short-term memory network (LSTM), and BERT. The three deep-learning methods (ie, CNN, LSTM, and BERT) significantly outperformed SVM, and BERT showed the best performance among them.</p>
        <p>During the last 2 or 3 years, pretrained language representation models such as BERT, RoBERTa, and XLNET have shown significant performance gains in many natural language processing tasks such as text classification, question answering, and others [<xref ref-type="bibr" rid="ref39">39</xref>]. However, to the best of our knowledge, deep-learning methods with pretrained language representation models have not yet been applied to depression risk prediction.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Dataset</title>
        <p>In this study, we use the dataset provided by Wang et al [<xref ref-type="bibr" rid="ref16">16</xref>], which was collected from the Chinese social media platform Sina Weibo. In this dataset, 13,993 microblogs were annotated with depression risk assessed at four levels from 0 to 3, where 0 indicates no inclination to depression, or only some common pressures such as work, study, and family issues; 1 indicates mild depression, denoting that users express despair with life but do not mention suicide or self-harm; 2 indicates moderate depression, which denotes that users mention suicide or self-harm without stating a specific time or place; and 3 indicates severe depression, which denotes that users mention suicide or self-harm with a specific time or place. A total of 11,835 microblogs were annotated as 0, 1379 microblogs were annotated as 1, 650 microblogs were annotated as 2, and the remaining 129 microblogs were annotated as 3. The distribution of microblogs at different levels was imbalanced. <xref ref-type="table" rid="table1">Table 1</xref> provides examples of the different depression levels. Following Wang et al [<xref ref-type="bibr" rid="ref16">16</xref>], we split the dataset into two parts: a training set of 11,194 microblogs and a test set of 2799 microblogs, as shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Examples of different depression risk levels in the dataset.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="700"/>
            <thead>
              <tr valign="top">
                <td>Depression risk level</td>
                <td> Microblog</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>3</td>
                <td>Weibo: 不出意外的话，我打算死在今年 。<break/> Barring accidents, I plan to commit suicide this year.</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Weibo: 我一直策划着如何自杀，可是放不下的太多了。<break/> I have been planning to commit suicide, but I cannot let go of too many things.</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>Weibo: 如果我累，真的离开了。<break/>If I’m tired, I will leave.</td>
              </tr>
              <tr valign="top">
                <td>0</td>
                <td>Weibo: 吃了个早餐应该能维持今天。<break/> The breakfast I ate should be able to support me today.</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Dataset statistics.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="300"/>
            <col width="400"/>
            <thead>
              <tr valign="top">
                <td>Depression level</td>
                <td>Training set (n)</td>
                <td>Test set (n)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>3</td>
                <td>103</td>
                <td>26</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>520</td>
                <td>130</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>1103</td>
                <td>276</td>
              </tr>
              <tr valign="top">
                <td>0</td>
                <td>9468</td>
                <td>2367</td>
              </tr>
              <tr valign="top">
                <td>All</td>
                <td>11,194</td>
                <td>2799</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Deep-Learning Methods Based on Pretrained Language Representation Models</title>
        <sec>
          <title>BERT</title>
          <p>BERT is a language representation model designed to pretrain deep bidirectional representations from unlabeled text by jointly conditioning on both the left and right context in all layers [<xref ref-type="bibr" rid="ref17">17</xref>]. It uses the transformer architecture to capture long-distance dependences in sentences. During pretraining, BERT optimizes the masked language model (MLM) and the next sentence prediction (NSP) task jointly on large-scale unlabeled text. To implement NSP, BERT adds the token [CLS] at the beginning of every sequence. The final hidden state corresponding to the token [CLS] is then used as the aggregate sequence representation for downstream tasks. When the language representation model is pretrained, it can be subsequently fine-tuned for downstream tasks using the labeled data of downstream tasks. BERT achieved better performance on several natural language processing tasks in 2018 [<xref ref-type="bibr" rid="ref17">17</xref>]. In the present study, depression risk prediction was formalized as a classification task; therefore, we simply needed to feed the representation of token [CLS] into an output layer (a fully connected layer) and then fine-tune the whole network.</p>
        </sec>
        <sec>
          <title>RoBERTa</title>
          <p>RoBERTa is an optimized replication version of BERT [<xref ref-type="bibr" rid="ref18">18</xref>]. Compared with BERT, RoBERTa offers the following four improvements during training: (1) training the model for a longer period with larger batches over more data; (2) removing the NSP task; (3) training on longer sequences; and (4) dynamically changing the masking pattern applied to the training data. Based on these improvements, RoBERTa has achieved new state-of-the-art results on many tasks compared with BERT [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
        </sec>
        <sec>
          <title>XLNET</title>
          <p>XLNET is a generalized autoregressive method that takes advantage of both autoregressive language modeling and autoencoding while avoiding their limitations [<xref ref-type="bibr" rid="ref19">19</xref>]. As BERT and its variants (eg, RoBERTa) neglect the dependency between the masked positions and suffer from a pretrain-finetune discrepancy, XLNET adopts a permutation language model instead of MLM to solve the discrepancy problem. For downstream tasks, the fine-tuning procedure of XLNET is similar to that of BERT and RoBERTa.</p>
        </sec>
      </sec>
      <sec>
        <title>Experiments</title>
        <sec>
          <title>Experimental Setup</title>
          <p>We investigated the different deep-learning methods with pretrained language representation models in two settings: (1) publicly released pretrained language representation models and (2) language representation models further pretrained on a large-scale unlabeled dataset collected from Weibo based on (1). The hyperparameters for BERT, RoBERTa, and XLNET for depression risk prediction are listed in <xref ref-type="table" rid="table3">Table 3</xref>. These hyperparameters were obtained by crossvalidation.</p>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Hyperparameters for the deep-learning methods.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="400"/>
              <col width="200"/>
              <col width="200"/>
              <col width="200"/>
              <thead>
                <tr valign="bottom">
                  <td>Parameter</td>
                  <td>BERT<sup>a</sup></td>
                  <td>RoBERTa<sup>b</sup></td>
                  <td>XLNET<sup>c</sup></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Learning rate</td>
                  <td>1e-5</td>
                  <td>1e-5</td>
                  <td>2e-5</td>
                </tr>
                <tr valign="top">
                  <td>Training steps</td>
                  <td>7000</td>
                  <td>7000</td>
                  <td>7000</td>
                </tr>
                <tr valign="top">
                  <td>Maximum length</td>
                  <td>128</td>
                  <td>128</td>
                  <td>128</td>
                </tr>
                <tr valign="top">
                  <td>Batch size</td>
                  <td>16</td>
                  <td>16</td>
                  <td>16</td>
                </tr>
                <tr valign="top">
                  <td>Warm-up steps</td>
                  <td>700</td>
                  <td>700</td>
                  <td>700</td>
                </tr>
                <tr valign="top">
                  <td>Dropout rate</td>
                  <td>0.3</td>
                  <td>0.3</td>
                  <td>0.3</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table3fn1">
                <p><sup>a</sup>BERT: bidirectional encoder representations from transformers.</p>
              </fn>
              <fn id="table3fn2">
                <p><sup>b</sup>RoBERTa: robustly optimized bidirectional encoder representations from transformers pretraining approach.</p>
              </fn>
              <fn id="table3fn3">
                <p><sup>c</sup>XLNET: generalized autoregressive pretraining for language understanding.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>In-Domain Pretraining</title>
          <p>For in-domain pretraining (IDP), we started from the public released pretrained BERT model [<xref ref-type="bibr" rid="ref40">40</xref>], RoBERTa model [<xref ref-type="bibr" rid="ref41">41</xref>], and XLNET model [<xref ref-type="bibr" rid="ref42">42</xref>], and further pretrained them on the same unlabeled Weibo corpus as used by Wang et al [<xref ref-type="bibr" rid="ref16">16</xref>]. The unlabeled corpus contains about 300,000 microblogs. The hyperparameters used during further IDP are listed in <xref ref-type="table" rid="table4">Table 4</xref>. These hyperparameters were optimized by crossvalidation.</p>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>Hyperparameters during further in-domain pretraining for the deep-learning methods.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="400"/>
              <col width="200"/>
              <col width="200"/>
              <col width="200"/>
              <thead>
                <tr valign="top">
                  <td>Parameter</td>
                  <td>BERT<sup>a</sup></td>
                  <td> RoBERTa<sup>b</sup></td>
                  <td>XLNET<sup>c</sup></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Learning rate</td>
                  <td>2e-5</td>
                  <td>2e-5</td>
                  <td>2e-5</td>
                </tr>
                <tr valign="top">
                  <td>Training steps</td>
                  <td>100,000</td>
                  <td>100,000</td>
                  <td>100,000</td>
                </tr>
                <tr valign="top">
                  <td>Maximum length</td>
                  <td>256</td>
                  <td>256</td>
                  <td>256</td>
                </tr>
                <tr valign="top">
                  <td>Batch size</td>
                  <td>16</td>
                  <td>16</td>
                  <td>16</td>
                </tr>
                <tr valign="top">
                  <td>Warm-up steps</td>
                  <td>10,000</td>
                  <td>10,000</td>
                  <td>10,000</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table4fn1">
                <p><sup>a</sup>BERT: bidirectional encoder representations from transformers.</p>
              </fn>
              <fn id="table4fn2">
                <p><sup>b</sup>RoBERTa: robustly optimized bidirectional encoder representations from transformers pretraining approach.</p>
              </fn>
              <fn id="table4fn3">
                <p><sup>c</sup>XLNET: generalized autoregressive pretraining for language understanding.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Evaluation Criteria</title>
          <p>Micro/macro precision, recall, and the F1 score were used to evaluate the performance of the different deep-learning methods.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p><xref ref-type="table" rid="table5">Table 5</xref> shows the performance of deep-learning methods with different language representation models. For each deep-learning method, the addition of a pretrained language representation model brought improvement over the publicly released language representation model. Among the three methods, BERT showed the best performance, with the highest microF1 score of 0.856 (BERT_IDP). The microF1 score difference between any two of the three methods was around 1%-2%, which is not satisfactory. Compared with CNN and LSTM, BERT, RoBERTa, and XLNET showed a great advantage.</p>
      <p>Almost all of the deep-learning methods performed the best on level 0 and performed the worst on level 3, which may be caused by data imbalance. For all depression risk levels except for level 0, the deep-learning methods showed different performance rankings. On level 1, RoBERTa_IDP performed the best with an F1 score of 0.422, whereas on level 2, XLNET_IDP achieved the best F1 score of 0.493, and on level 3, XLNET achieved the best F1 score of 0.445.</p>
      <p>As the aim of this study was to discover potential patients with depression, we were more interested in microblogs at levels 1, 2, and 3. Therefore, it is more meaningful to report macro precision, recall, and F1 scores on these three levels, which are shown in <xref ref-type="table" rid="table6">Table 6</xref>, in which the highest values in each column are in italics. The advantage of RoBERTa_IDP for microblog-level depression detection can be clearly seen. The confusion matrices of BERT_IDP, RoBERTa_IDP, and XLNET_IDP are shown in <xref ref-type="table" rid="table7">Table 7</xref>.</p>
      <table-wrap position="float" id="table5">
        <label>Table 5</label>
        <caption>
          <p>Performance of deep-learning methods with different language representation models.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="120"/>
          <col width="60"/>
          <col width="60"/>
          <col width="70"/>
          <col width="0"/>
          <col width="60"/>
          <col width="70"/>
          <col width="70"/>
          <col width="0"/>
          <col width="70"/>
          <col width="70"/>
          <col width="70"/>
          <col width="0"/>
          <col width="70"/>
          <col width="60"/>
          <col width="60"/>
          <col width="0"/>
          <col width="90"/>
          <thead>
            <tr valign="top">
              <td>Model</td>
              <td colspan="4">Level-0</td>
              <td colspan="4">Level-1</td>
              <td colspan="4">Level-2</td>
              <td colspan="4">Level-3</td>
              <td>MicroF1</td>
            </tr>
            <tr valign="bottom">
              <td>
                <break/>
              </td>
              <td>P<sup>a</sup></td>
              <td>R<sup>b</sup></td>
              <td>F1</td>
              <td colspan="2">P</td>
              <td>R</td>
              <td>F1</td>
              <td colspan="2">P</td>
              <td>R</td>
              <td>F1</td>
              <td colspan="2">P</td>
              <td>R</td>
              <td>F1</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
          </thead>
          <tbody>
            <tr valign="bottom">
              <td>CNN<sup>c</sup> [<xref ref-type="bibr" rid="ref16">16</xref>]</td>
              <td>0.908</td>
              <td>0.940</td>
              <td>0.924</td>
              <td colspan="2">0.380</td>
              <td>0.236</td>
              <td>0.291</td>
              <td colspan="2">0.351</td>
              <td>0.415</td>
              <td>0.380</td>
              <td colspan="2">0.250</td>
              <td>0.231</td>
              <td>0.240</td>
              <td colspan="2">0.841</td>
            </tr>
            <tr valign="bottom">
              <td>LSTM<sup>d</sup> [<xref ref-type="bibr" rid="ref16">16</xref>]</td>
              <td>0.896</td>
              <td>0.936</td>
              <td>0.916</td>
              <td colspan="2">0.294</td>
              <td>0.288</td>
              <td>0.257</td>
              <td colspan="2">0.324</td>
              <td>0.262</td>
              <td>0.289</td>
              <td colspan="2">0.714</td>
              <td>0.192</td>
              <td>0.303</td>
              <td colspan="2">0.832</td>
            </tr>
            <tr valign="bottom">
              <td> BERT<sup>e</sup> [<xref ref-type="bibr" rid="ref16">16</xref>]</td>
              <td>0.942</td>
              <td>0.894</td>
              <td>0.917</td>
              <td colspan="2">0.323</td>
              <td>0.502</td>
              <td>0.393</td>
              <td colspan="2">0.468</td>
              <td>0.489</td>
              <td>0.478</td>
              <td colspan="2">0.574</td>
              <td>0.152</td>
              <td>0.240</td>
              <td colspan="2">0.834</td>
            </tr>
            <tr valign="bottom">
              <td>BERT_IDP<sup>f</sup> [<xref ref-type="bibr" rid="ref16">16</xref>]</td>
              <td>0.929</td>
              <td>0.938</td>
              <td>
                <italic>0.934</italic>
                <sup>g</sup>
              </td>
              <td colspan="2">0.394</td>
              <td>0.446</td>
              <td>0.418</td>
              <td colspan="2">0.568</td>
              <td>0.385</td>
              <td>0.459</td>
              <td colspan="2">0.667</td>
              <td>0.231</td>
              <td>0.343</td>
              <td colspan="2">
                <italic>0.856</italic>
              </td>
            </tr>
            <tr valign="bottom">
              <td>RoBERTa<sup>h</sup></td>
              <td>0.931</td>
              <td>0.920</td>
              <td>0.925</td>
              <td colspan="2">0.355</td>
              <td>0.464</td>
              <td>0.402</td>
              <td colspan="2">0.556</td>
              <td>0.385</td>
              <td>0.455</td>
              <td colspan="2">0.600</td>
              <td>0.231</td>
              <td>0.333</td>
              <td colspan="2"> 0.843</td>
            </tr>
            <tr valign="bottom">
              <td> RoBERTa_IDP</td>
              <td>0.933</td>
              <td>0.920</td>
              <td>0.926</td>
              <td colspan="2">0.371</td>
              <td>0.489</td>
              <td>
                <italic>0.422</italic>
              </td>
              <td colspan="2">0.578</td>
              <td>0.400</td>
              <td>0.473</td>
              <td colspan="2">0.636</td>
              <td>0.269</td>
              <td>0.333</td>
              <td colspan="2"> 0.847</td>
            </tr>
            <tr valign="bottom">
              <td>XLNET<sup>i</sup></td>
              <td>0.908</td>
              <td>0.948</td>
              <td>0.927</td>
              <td colspan="2">0.358</td>
              <td>0.273</td>
              <td>0.309</td>
              <td colspan="2">0.484</td>
              <td>0.353</td>
              <td>0.408</td>
              <td colspan="2">0.530</td>
              <td>0.384</td>
              <td>
                <italic>0.445</italic>
              </td>
              <td colspan="2">0.848</td>
            </tr>
            <tr valign="bottom">
              <td> XLNET_IDP</td>
              <td>0.933</td>
              <td>0.920</td>
              <td>0.926</td>
              <td colspan="2">0.361</td>
              <td>0.471</td>
              <td>0.409</td>
              <td colspan="2">0.577</td>
              <td>0.431</td>
              <td>
                <italic>0.493</italic>
              </td>
              <td colspan="2">0.625</td>
              <td>0.192</td>
              <td>0.294</td>
              <td colspan="2"> 0.846</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table5fn1">
            <p><sup>a</sup>P: precision.</p>
          </fn>
          <fn id="table5fn2">
            <p><sup>b</sup>R: recall.</p>
          </fn>
          <fn id="table5fn3">
            <p><sup>c</sup>CNN: convolutional neural network.</p>
          </fn>
          <fn id="table5fn4">
            <p><sup>d</sup>LSTM: long short-term memory network.</p>
          </fn>
          <fn id="table5fn5">
            <p><sup>e</sup>BERT: bidirectional encoder representations from transformers.</p>
          </fn>
          <fn id="table5fn6">
            <p><sup>f</sup>_IDP: The model is further trained on the in-domain unlabeled corpus.</p>
          </fn>
          <fn id="table5fn7">
            <p><sup>g</sup>Highest F1 values are indicated in italics.</p>
          </fn>
          <fn id="table5fn8">
            <p><sup>h</sup>RoBERTa: robustly optimized bidirectional encoder representations from transformers pretraining approach.</p>
          </fn>
          <fn id="table5fn9">
            <p><sup>i</sup>XLNET: generalized autoregressive pretraining for language understanding.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <table-wrap position="float" id="table6">
        <label>Table 6</label>
        <caption>
          <p>Performance of deep-learning methods with different language representation models on level 1, 2 and 3.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="250"/>
          <col width="250"/>
          <col width="250"/>
          <col width="250"/>
          <thead>
            <tr valign="bottom">
              <td>Model</td>
              <td>Macro-F1</td>
              <td>Macro-P<sup>a</sup></td>
              <td>Macro-R<sup>b</sup></td>
            </tr>
          </thead>
          <tbody>
            <tr valign="bottom">
              <td>BERT<sup>c</sup> [<xref ref-type="bibr" rid="ref16">16</xref>]</td>
              <td>0.370</td>
              <td>0.455</td>
              <td>0.381</td>
            </tr>
            <tr valign="bottom">
              <td>BERT_IDP<sup>d</sup> [<xref ref-type="bibr" rid="ref16">16</xref>]</td>
              <td>0.406</td>
              <td>
                <italic>0.543</italic>
                <sup>e</sup>
              </td>
              <td>0.354</td>
            </tr>
            <tr valign="bottom">
              <td>RoBERTa<sup>f</sup></td>
              <td>0.396</td>
              <td>0.503</td>
              <td>0.360</td>
            </tr>
            <tr valign="bottom">
              <td> RoBERTa_IDP</td>
              <td>
                <italic>0.424</italic>
              </td>
              <td> 0.528</td>
              <td>
                <italic>0.386</italic>
              </td>
            </tr>
            <tr valign="bottom">
              <td>XLNET<sup>g</sup></td>
              <td>0.387</td>
              <td>0.457</td>
              <td>0.336</td>
            </tr>
            <tr valign="bottom">
              <td>XLNET_IDP</td>
              <td>0.398</td>
              <td>0.521</td>
              <td>0.364</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table6fn1">
            <p><sup>a</sup>P: precision.</p>
          </fn>
          <fn id="table6fn2">
            <p><sup>b</sup>R: recall.</p>
          </fn>
          <fn id="table6fn3">
            <p><sup>c</sup>BERT: bidirectional encoder representations from transformers.</p>
          </fn>
          <fn id="table6fn4">
            <p><sup>d</sup>_IDP: The model is further trained on the in-domain unlabeled corpus.</p>
          </fn>
          <fn id="table6fn5">
            <p><sup>e</sup>Highest F1 values are indicated in italics.</p>
          </fn>
          <fn id="table6fn6">
            <p><sup>f</sup>RoBERTa: robustly optimized bidirectional encoder representations from transformers pretraining approach.</p>
          </fn>
          <fn id="table6fn7">
            <p><sup>g</sup>XLNET: generalized autoregressive pretraining for language understanding.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <table-wrap position="float" id="table7">
        <label>Table 7</label>
        <caption>
          <p>Confusion matrix of the deep-learning methods with in-domain training.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="170"/>
          <col width="200"/>
          <col width="200"/>
          <col width="200"/>
          <col width="200"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Gold-standard method</td>
              <td>Prediction method Level-0</td>
              <td>Prediction method Level-1</td>
              <td>Prediction method Level-2</td>
              <td>Prediction method Level-3</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="2">
                <bold>BERT_IDP<sup>a</sup></bold>
              </td>
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Level-0</td>
              <td>2221</td>
              <td>131</td>
              <td>14</td>
              <td>1</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Level-1</td>
              <td>137</td>
              <td>123</td>
              <td>16</td>
              <td>0</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Level-2</td>
              <td>26</td>
              <td>52</td>
              <td>50</td>
              <td>2</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Level-3</td>
              <td>6</td>
              <td>6</td>
              <td>8</td>
              <td>6</td>
            </tr>
            <tr valign="top">
              <td colspan="2">
                <bold>RoBERTa_IDP<sup>b</sup></bold>
              </td>
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Level-0</td>
              <td>2177</td>
              <td>176</td>
              <td>13</td>
              <td>1</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Level-1</td>
              <td>128</td>
              <td>135</td>
              <td>15</td>
              <td>0</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Level-2</td>
              <td>26</td>
              <td>47</td>
              <td>52</td>
              <td>3</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Level-3</td>
              <td>3</td>
              <td>6</td>
              <td>10</td>
              <td>7</td>
            </tr>
            <tr valign="top">
              <td colspan="2">
                <bold>XLNET_IDP<sup>c</sup></bold>
              </td>
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Level-0</td>
              <td>2177</td>
              <td>176</td>
              <td>13</td>
              <td>1</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Level-1</td>
              <td>128</td>
              <td>130</td>
              <td>18</td>
              <td>0</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Level-2</td>
              <td>26</td>
              <td>46</td>
              <td>56</td>
              <td>2</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Level-3</td>
              <td>3</td>
              <td>8</td>
              <td>10</td>
              <td> 5</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table7fn1">
            <p><sup>e</sup>BERT_IDP: bidirectional encoder representations from transformers further trained on the in-domain unlabeled corpus.</p>
          </fn>
          <fn id="table7fn2">
            <p><sup>b</sup>RoBERTa_IDP: robustly optimized bidirectional encoder representations from transformers pretraining approach further trained on the in-domain unlabeled corpus.</p>
          </fn>
          <fn id="table7fn3">
            <p><sup>c</sup>XLNET_IDP: generalized autoregressive pretraining for language understanding further trained on the in-domain unlabeled corpus.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, we have applied three deep-learning methods with pretrained language representation models to predict the depression risk based on data from Chinese microblogs, which is recognized as a text classification task. The deep-learning methods achieved the highest macroaveraged F1 score of 0.424 on the three levels of depression of concern, which represents a new state-of-the-art result from the dataset used by Wang et al [<xref ref-type="bibr" rid="ref16">16</xref>]. These results indicate the potential for tracing mental health conditions of depression patients from microblogs. We also investigated the effect of pretraining language representation models in different settings. These experiments showed that further applying pretrained language representation models on a large-scale unlabeled in-domain corpus leads to better performance, which is easily interpretable.</p>
        <p>Error analysis on the deep-learning methods showed that several errors often occur between level 0 and level 1. As shown in the confusion matrix in <xref ref-type="table" rid="table7">Table 7</xref>, among all samples predicted incorrectly by RoBERTa_IDP, 128 gold-standard samples at level 1 were predicted as level 0 and 176 gold-standard samples at level 0 were predicted as level 1. This type of error accounted for about 70% of all errors. The main reason for this phenomenon is that there are many ambiguous words in Chinese microblogs, which are difficult to be distinguished independently. These ambiguous words also occurred very frequently in microblogs of high depression risk levels. For example, in microblog “我已经放下了亲情、友情，都已经和解了，可以安心上路了(I have let go of my family and friendships, and have reconciled with them. Now, I can go on my way with ease),” “上路” is an ambiguous word. In Chinese, this word not only means “going on one’s way” but also has the meaning of passing away. Other examples include ”解脱 (extricate)” in “啥时候能够解脱呢？有点期待 (When can I extricate myself from the tough world? I am looking forward to it),” and “黑(black)” in “我看到的世界都是黑的只剩下一片黑 (The world I see is black, only black).” These words are not related to depression risk in most common contexts. However, in the contexts mentioned above, these words indicate the despair of patients in life. Since these words appeared infrequently in the entire depression dataset, it was very difficult for the deep-learning models to learn the multiple meanings of these ambiguous words. From the confusion matrix, we can see that RoBERTa_IDP could correctly classify more samples at a high level than the previous BERT model. This suggests that our new methods can handle these types of errors better than previous methods. For these types of errors, there may be two possible solutions: one is to import more samples containing these ambiguous words to help the models learn the multiple meanings of these words, and the other is to import more of the context from the same user to help the models make a correct prediction.</p>
        <p>In the future, there may be three directions for further improvement. First, we will expand the current dataset to cover as many multiple meanings of ambiguous words as possible. Second, we will attempt to use user-level context to improve microblog-level depression risk prediction. Third, we will try to add medical knowledge regarding depression into the deep-learning methods.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>Depression is one of the most harmful mental disorders worldwide. The diagnosis of depression is quite complex and time-consuming. Predicting depression risk automatically is very important and meaningful. In this study, we have focused on the potential of deep-learning methods with pretrained language representation models for depression risk prediction from Chinese microblogs. The experimental results on a benchmark dataset showed that the proposed methods performed well for this task. The main contribution of this study to depression health care is to help discover potential patients with depression from social media quickly. This could help doctors or psychologists to concentrate on providing help for these potential patients with a high depression level.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CLPsych</term>
          <def>
            <p>Computational Linguistics and Clinical Psychology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">IDP</term>
          <def>
            <p>in-domain pretraining</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LSTM</term>
          <def>
            <p>long short-term memory network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MLM</term>
          <def>
            <p>masked language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NSP</term>
          <def>
            <p>next sentence prediction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">PTSD</term>
          <def>
            <p>posttraumatic stress disorder</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">RoBERTa</term>
          <def>
            <p>robustly optimized bidirectional encoder representations from transformers pretraining approach</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">XLNET</term>
          <def>
            <p>generalized autoregressive pretraining for language understanding</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study is supported in part by grants from the National Natural Science Foundations of China (U1813215, 61876052, and 61573118), Special Foundation for Technology Research Program of Guangdong Province (2015B010131010), National Natural Science Foundations of Guangdong, China (2019A1515011158), Guangdong Province Covid-19 Pandemic Control Research Fund (2020KZDZX1222), Strategic Emerging Industry Development Special Funds of Shenzhen (JCYJ20180306172232154 and JCYJ20170307150528934), and Innovation Fund of Harbin Institute of Technology (HIT.NSRIF.2017052).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>The work presented herein was carried out with collaboration among all authors. XW, SC, and BT designed the methods and experiments. XW and SC conducted the experiment. All authors analyzed the data and interpreted the results. SC and BT wrote the paper. All authors have approved the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Promoting mental health: Concepts, emerging evidence, practice: Summary report</article-title>
          <source>World Health Organization</source>
          <year>2004</year>
          <access-date>2020-07-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/mental_health/evidence/en/promoting_mhh.pdf">https://www.who.int/mental_health/evidence/en/promoting_mhh.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <source>Results from the 2013 National Survey on Drug Use and Health: Mental Health Findings</source>
          <year>2013</year>
          <access-date>2020-07-07</access-date>
          <publisher-name>US Department of Health and Human Services, Substance Abuse and Mental Health Services Administration, Center for Behavioral Health Statistics and Quality</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.samhsa.gov/data/sites/default/files/NSDUHmhfr2013/NSDUHmhfr2013.pdf">https://www.samhsa.gov/data/sites/default/files/NSDUHmhfr2013/NSDUHmhfr2013.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saxena</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Funk</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chisholm</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>World Health Assembly adopts Comprehensive Mental Health Action Plan 2013-2020</article-title>
          <source>Lancet</source>
          <year>2013</year>
          <month>06</month>
          <day>08</day>
          <volume>381</volume>
          <issue>9882</issue>
          <fpage>1970</fpage>
          <lpage>1971</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0140-6736(13)61139-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(13)61139-3</pub-id>
          <pub-id pub-id-type="medline">23746771</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(13)61139-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moussavi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chatterji</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Verdes</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Tandon</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ustun</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Depression, chronic diseases, and decrements in health: results from the World Health Surveys</article-title>
          <source>Lancet</source>
          <year>2007</year>
          <month>09</month>
          <day>08</day>
          <volume>370</volume>
          <issue>9590</issue>
          <fpage>851</fpage>
          <lpage>858</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(07)61415-9</pub-id>
          <pub-id pub-id-type="medline">17826170</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(07)61415-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Doris</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ebmeier</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Shajahan</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Depressive illness</article-title>
          <source>Lancet</source>
          <year>1999</year>
          <month>10</month>
          <volume>354</volume>
          <issue>9187</issue>
          <fpage>1369</fpage>
          <lpage>1375</lpage>
          <pub-id pub-id-type="doi">10.1016/s0140-6736(99)03121-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez</surname>
              <given-names>AD</given-names>
            </name>
          </person-group>
          <article-title>Global mortality, disability, and the contribution of risk factors: Global Burden of Disease Study</article-title>
          <source>Lancet</source>
          <year>1997</year>
          <month>05</month>
          <day>17</day>
          <volume>349</volume>
          <issue>9063</issue>
          <fpage>1436</fpage>
          <lpage>1442</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(96)07495-8</pub-id>
          <pub-id pub-id-type="medline">9164317</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(96)07495-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Picardi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lega</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Tarsitani</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Caredda</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Matteucci</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zerella</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Miglio</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gigantesco</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cerbo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gaddini</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Spandonaro</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Biondi</surname>
              <given-names>M</given-names>
            </name>
            <collab>SET-DEP Group</collab>
          </person-group>
          <article-title>A randomised controlled trial of the effectiveness of a program for early detection and treatment of depression in primary care</article-title>
          <source>J Affect Disord</source>
          <year>2016</year>
          <month>07</month>
          <day>01</day>
          <volume>198</volume>
          <fpage>96</fpage>
          <lpage>101</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jad.2016.03.025</pub-id>
          <pub-id pub-id-type="medline">27015158</pub-id>
          <pub-id pub-id-type="pii">S0165-0327(15)31411-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baik</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bowers</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Oakley</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Susman</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>The recognition of depression: the primary care clinician's perspective</article-title>
          <source>Ann Fam Med</source>
          <year>2005</year>
          <month>01</month>
          <day>01</day>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>31</fpage>
          <lpage>37</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.annfammed.org/cgi/pmidlookup?view=long&#38;pmid=15671188"/>
          </comment>
          <pub-id pub-id-type="doi">10.1370/afm.239</pub-id>
          <pub-id pub-id-type="medline">15671188</pub-id>
          <pub-id pub-id-type="pii">3/1/31</pub-id>
          <pub-id pub-id-type="pmcid">PMC1466789</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gamon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Counts</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Horvitz</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Predicting depression via social media</article-title>
          <year>2013</year>
          <month>7</month>
          <day>8</day>
          <conf-name>Proceedings of the seventh international AAAI conference on weblogs and social media</conf-name>
          <conf-date>2013</conf-date>
          <conf-loc>Cambridge, MA, USA</conf-loc>
          <publisher-name>Association for the Advancement of Artificial Intelligence</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sanchez-Villegas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schlatter</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ortuno</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lahortiga</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Pla</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Benito</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Martinez-Gonzalez</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Validity of a self-reported diagnosis of depression among participants in a cohort study using the Structured Clinical Interview for DSM-IV (SCID-I)</article-title>
          <source>BMC Psychiatry</source>
          <year>2008</year>
          <month>6</month>
          <day>17</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>43</fpage>
          <pub-id pub-id-type="doi">10.1186/1471-244x-8-43</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Houben</surname>
              <given-names>GJ</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Konstan</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Conejo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Marzo</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Oliver</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Analyzing user modeling on twitter for personalized news recommendations</article-title>
          <source>User Modeling, Adapatation and Personalization. UMAP 2011. Lecture Notes in Computer Science, vol. 6787</source>
          <year>2011</year>
          <publisher-loc>Berlin, Heidelberg</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mingyi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Renwei</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>A Research on Social Network Information Distribution Pattern With Internet Public Opinion Formation</article-title>
          <source>Journalism Communication</source>
          <year>2009</year>
          <volume>5</volume>
          <fpage>72</fpage>
          <lpage>78</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rothenberg</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Sterk</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Toomey</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Potterat</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Schrader</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hatch</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Using social network and ethnographic tools to evaluate syphilis transmission</article-title>
          <source>Sex Transm Dis</source>
          <year>1998</year>
          <month>03</month>
          <volume>25</volume>
          <issue>3</issue>
          <fpage>154</fpage>
          <lpage>160</lpage>
          <pub-id pub-id-type="doi">10.1097/00007435-199803000-00009</pub-id>
          <pub-id pub-id-type="medline">9524994</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>Impact of Predicting Health Care Utilization Via Web Search Behavior: A Data-Driven Analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2016</year>
          <month>09</month>
          <day>21</day>
          <volume>18</volume>
          <issue>9</issue>
          <fpage>e251</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2016/9/e251/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.6240</pub-id>
          <pub-id pub-id-type="medline">27655225</pub-id>
          <pub-id pub-id-type="pii">v18i9e251</pub-id>
          <pub-id pub-id-type="pmcid">PMC5052461</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Colineau</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Paris</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Talking about your health to strangers: understanding the use of online social networks by patients</article-title>
          <source>New Rev Hypermedia Multimed</source>
          <year>2010</year>
          <month>04</month>
          <volume>16</volume>
          <issue>1-2</issue>
          <fpage>141</fpage>
          <lpage>160</lpage>
          <pub-id pub-id-type="doi">10.1080/13614568.2010.496131</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Assessing depression risk in Chinese microblogs: a corpus and machine learning methods</article-title>
          <year>2019</year>
          <conf-name>IEEE International Conference on Healthcare Informatics (ICHI)</conf-name>
          <conf-date>June 10-13, 2019</conf-date>
          <conf-loc>Xi'an, China</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ichi.2019.8904506</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: Pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>arXiv preprint</source>
          <year>2018</year>
          <fpage>181004805</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RoBERTa: A robustly optimized bert pretraining approach</article-title>
          <source>arXiv preprint</source>
          <year>2019</year>
          <fpage>1907.11692v1</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Carbonell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Salakhutdinov</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>QV</given-names>
            </name>
          </person-group>
          <article-title>XLNet: Generalized Autoregressive Pretraining for Language Understanding</article-title>
          <source>arXiv preprint</source>
          <year>2019</year>
          <fpage>1906.08237</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Sartorius</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ban</surname>
              <given-names>TA</given-names>
            </name>
          </person-group>
          <article-title>The Hamilton rating scale for depression</article-title>
          <source>Assessment of depression</source>
          <year>1986</year>
          <publisher-loc>Berlin, Heidelberg</publisher-loc>
          <publisher-name>Springer-Verlag</publisher-name>
          <fpage>143</fpage>
          <lpage>152</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coppersmith</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Harman</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Quantifying mental health signals in Twitter</article-title>
          <year>2014</year>
          <conf-name>Proceedings of the workshop on computational linguistics and clinical psychology: From linguistic signal to clinical reality</conf-name>
          <conf-date>June 2014</conf-date>
          <conf-loc>Baltimore, MD</conf-loc>
          <fpage>51</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.3115/v1/w14-3207</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coppersmith</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Harman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hollingshead</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>CLPsych 2015 shared task: Depression and PTSD on Twitter</article-title>
          <year>2015</year>
          <conf-name>Proceedings of the 2nd Workshop on Computational Linguistics and Clinical Psychology: From Linguistic Signal to Clinical Reality</conf-name>
          <conf-date>June 5, 2015</conf-date>
          <conf-loc>Denver, Colorado</conf-loc>
          <pub-id pub-id-type="doi">10.3115/v1/w15-1204</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blei</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>AY</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>MI</given-names>
            </name>
          </person-group>
          <article-title>Latent dirichllocation</article-title>
          <source>J Machine Learn Res</source>
          <year>2003</year>
          <volume>3</volume>
          <fpage>993</fpage>
          <lpage>1022</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Resnik</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Claudino</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>The University of Maryland CLPsych 2015 shared task system</article-title>
          <year>2015</year>
          <month>6</month>
          <day>5</day>
          <conf-name>Proceedings of the 2nd Workshop on Computational Linguistics and Clinical Psychology: From Linguistic Signal to Clinical Reality</conf-name>
          <conf-date>June 5, 2015</conf-date>
          <conf-loc>Denver, Colorado</conf-loc>
          <pub-id pub-id-type="doi">10.3115/v1/w15-1207</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cacheda</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Novoa</surname>
              <given-names>FJ</given-names>
            </name>
            <name name-style="western">
              <surname>Carneiro</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Early Detection of Depression: Social Network Analysis and Random Forest Techniques</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>06</month>
          <day>10</day>
          <volume>21</volume>
          <issue>6</issue>
          <fpage>e12554</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/6/e12554/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12554</pub-id>
          <pub-id pub-id-type="medline">31199323</pub-id>
          <pub-id pub-id-type="pii">v21i6e12554</pub-id>
          <pub-id pub-id-type="pmcid">PMC6598420</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ricard</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Marsch</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Crosier</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hassanpour</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Exploring the Utility of Community-Generated Social Media Content for Detecting Depression: An Analytical Study on Instagram</article-title>
          <source>J Med Internet Res</source>
          <year>2018</year>
          <month>12</month>
          <day>06</day>
          <volume>20</volume>
          <issue>12</issue>
          <fpage>e11817</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2018/12/e11817/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/11817</pub-id>
          <pub-id pub-id-type="medline">30522991</pub-id>
          <pub-id pub-id-type="pii">v20i12e11817</pub-id>
          <pub-id pub-id-type="pmcid">PMC6302231</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>User-level psychological stress detection from social media using deep neural network</article-title>
          <year>2014</year>
          <month>11</month>
          <day>1</day>
          <conf-name>Proceedings of the 22nd ACM international conference on Multimedia</conf-name>
          <conf-date>November 2014</conf-date>
          <conf-loc>Orlando, FL</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2647868.2654945</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wongkoblap</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vadillo</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Curcin</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Researching Mental Health Disorders in the Era of Social Media: Systematic Review</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>06</month>
          <day>29</day>
          <volume>19</volume>
          <issue>6</issue>
          <fpage>e228</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2017/6/e228/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.7215</pub-id>
          <pub-id pub-id-type="medline">28663166</pub-id>
          <pub-id pub-id-type="pii">v19i6e228</pub-id>
          <pub-id pub-id-type="pmcid">PMC5509952</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burnap</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Colombo</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Scourfield</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Machine classification and analysis of suicide-related communication on twitter</article-title>
          <year>2015</year>
          <month>9</month>
          <day>1</day>
          <conf-name>Proceedings of the 26th ACM Conference on Hypertext &#38; Social Media</conf-name>
          <conf-date>August 2015</conf-date>
          <conf-loc>Guzelyurt, Northern Cyprus</conf-loc>
          <fpage>75</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.1145/2700171.2791023</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Prieto</surname>
              <given-names>VM</given-names>
            </name>
            <name name-style="western">
              <surname>Matos</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Álvarez</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cacheda</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Oliveira</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Twitter: a good place to detect health conditions</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <month>1</month>
          <day>29</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>e86191</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0086191"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0086191</pub-id>
          <pub-id pub-id-type="medline">24489699</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-10567</pub-id>
          <pub-id pub-id-type="pmcid">PMC3906034</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bao</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A depression detection model based on sentiment analysis in micro-blog social network</article-title>
          <source>Trends and Applications in Knowledge Discovery and Data Mining. PAKDD 2013. Lecture Notes in Computer Science, vol 7867</source>
          <year>2013</year>
          <month>4</month>
          <day>14</day>
          <publisher-loc>Berlin, Heidelberg</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>An improved model for depression detection in micro-blog social network</article-title>
          <year>2013</year>
          <month>12</month>
          <day>7</day>
          <conf-name>IEEE 13th International Conference on Data Mining Workshops</conf-name>
          <conf-date>December 7-10, 2013</conf-date>
          <conf-loc>Dallas, TX</conf-loc>
          <fpage>2013</fpage>
          <pub-id pub-id-type="doi">10.1109/icdmw.2013.132</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saravia</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>De</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>YS</given-names>
            </name>
          </person-group>
          <article-title>MIDAS: Mental illness detection and analysis via social media</article-title>
          <year>2016</year>
          <month>8</month>
          <day>18</day>
          <conf-name>IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM)</conf-name>
          <conf-date>August 18-21, 2016</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <fpage>2016</fpage>
          <pub-id pub-id-type="doi">10.1109/asonam.2016.7752434</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Yip</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Identifying Chinese Microblog Users With High Suicide Probability Using Internet-Based Profile and Linguistic Features: Classification Model</article-title>
          <source>JMIR Ment Health</source>
          <year>2015</year>
          <month>05</month>
          <day>12</day>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>e17</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mental.jmir.org/2015/2/e17/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/mental.4227</pub-id>
          <pub-id pub-id-type="medline">26543921</pub-id>
          <pub-id pub-id-type="pii">v2i2e17</pub-id>
          <pub-id pub-id-type="pmcid">PMC4607395</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Brede</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ianni</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Detecting and characterizing eating-disorder communities on social media</article-title>
          <year>2017</year>
          <month>2</month>
          <day>1</day>
          <conf-name>Proceedings of the Tenth ACM International Conference on Web Search and Data Mining</conf-name>
          <conf-date>2017</conf-date>
          <conf-loc>Cambridge, UK</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3018661.3018706</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Rau</surname>
              <given-names>PLP</given-names>
            </name>
          </person-group>
          <article-title>Predicting mental health status on social media</article-title>
          <source>Cross-cultural Design. Cultural Differences in Everyday Life. CCD 2013. Lecture Notes in Computer Science, vol 8024</source>
          <year>2014</year>
          <month>4</month>
          <day>23</day>
          <publisher-loc>Berlin, Heidelberg</publisher-loc>
          <publisher-name>Spring</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hollingshead</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Coppersmith</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Quantifying the language of schizophrenia in social media</article-title>
          <year>2015</year>
          <month>1</month>
          <day>1</day>
          <conf-name>Proceedings of the 2nd Workshop on Computational Linguistics and Clinical Psychology: From Linguistic Signal to Clinical Reality</conf-name>
          <conf-date>June 5, 2015</conf-date>
          <conf-loc>Denver, CO</conf-loc>
          <pub-id pub-id-type="doi">10.3115/v1/w15-1202</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jamil</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Inkpen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Buddhitha</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Monitoring tweets for depression to detect at-risk users</article-title>
          <year>2018</year>
          <month>8</month>
          <day>1</day>
          <conf-name>Proceedings of the Fourth Workshop on Computational Linguistics and Clinical Psychology: From Linguistic Signal to Clinical Reality</conf-name>
          <conf-date>August 2017</conf-date>
          <conf-loc>Vancouver, BC</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w17-3104</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Narasimhan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Salimans</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Improving language understanding with unsupervised learning</article-title>
          <source>OpenAI</source>
          <year>2018</year>
          <month>6</month>
          <day>11</day>
          <access-date>2020-07-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openai.com/blog/language-unsupervised/">https://openai.com/blog/language-unsupervised/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <article-title>bert</article-title>
          <source>github</source>
          <access-date>2020-07-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/google-research/bert">https://github.com/google-research/bert</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <article-title>fairseq</article-title>
          <source>github</source>
          <access-date>2020-07-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/pytorch/fairseq">https://github.com/pytorch/fairseq</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <article-title>xlnet</article-title>
          <source>github</source>
          <access-date>2020-07-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/zihangdai/xlnet">https://github.com/zihangdai/xlnet</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
