<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i8e37818</article-id>
      <article-id pub-id-type="pmid">35943770</article-id>
      <article-id pub-id-type="doi">10.2196/37818</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Emotion-Based Reinforcement Attention Network for Depression Detection on Social Media: Algorithm Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Hao</surname>
            <given-names>Tianyong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Gao</surname>
            <given-names>Jianliang</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Du</surname>
            <given-names>Yongping</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Torii</surname>
            <given-names>Manabu</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Cui</surname>
            <given-names>Bin</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6348-9454</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Jian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>College of Computer Science and Technology</institution>
            <institution>Dalian University of Technology</institution>
            <addr-line>Number 2, Linggong Road</addr-line>
            <addr-line>Ganjingzi District</addr-line>
            <addr-line>Dalian, Liaoning 116024</addr-line>
            <country>China</country>
            <phone>86 13604119266</phone>
            <email>wangjian@dlut.edu.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4656-7446</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>Hongfei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0872-7688</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Yijia</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5843-4675</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>Liang</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5557-7515</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>Bo</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5453-978X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>College of Computer Science and Technology</institution>
        <institution>Dalian University of Technology</institution>
        <addr-line>Dalian</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>College of Information Science and Technology</institution>
        <institution>Dalian Maritime University</institution>
        <addr-line>Dalian</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jian Wang <email>wangjian@dlut.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>9</day>
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>8</issue>
      <elocation-id>e37818</elocation-id>
      <history>
        <date date-type="received">
          <day>17</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>5</day>
          <month>6</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>2</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>6</day>
          <month>7</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Bin Cui, Jian Wang, Hongfei Lin, Yijia Zhang, Liang Yang, Bo Xu. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 09.08.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/8/e37818" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Depression detection has recently received attention in the field of natural language processing. The task aims to detect users with depression based on their historical posts on social media. However, existing studies in this area use the entire historical posts of the users and select depression indicator posts. Moreover, these methods fail to effectively extract deep emotional semantic features or simply concatenate emotional representation. To solve this problem, we propose a model to extract deep emotional semantic features and select depression indicator posts based on the emotional states.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to develop an emotion-based reinforcement attention network for depression detection of users on social media.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The proposed model is composed of 2 components: the emotion extraction network, which is used to capture deep emotional semantic information, and the reinforcement learning (RL) attention network, which is used to select depression indicator posts based on the emotional states. Finally, we concatenated the output of these 2 parts and send them to the classification layer for depression detection.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Experimental results of our model on the multimodal depression data set outperform the state-of-the-art baselines. Specifically, the proposed model achieved accuracy, precision, recall, and F1-score of 90.6%, 91.2%, 89.7%, and 90.4%, respectively.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The proposed model utilizes historical posts of users to effectively identify users’ depression tendencies. The experimental results show that the emotion extraction network and the RL selection layer based on emotional states can effectively improve the accuracy of detection. In addition, sentence-level attention layer can capture core posts.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>depression detection</kwd>
        <kwd>emotional semantic features</kwd>
        <kwd>social media</kwd>
        <kwd>sentence-level attention</kwd>
        <kwd>emotion-based reinforcement</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>As an important part of medical informatics research, depression is one of the most dangerous diseases impacting human mental health. It is different from usual mood swings and transient emotional reactions. Long-term depression may cause severe problems for the patient, such as suicide. The World Health Organization (WHO) ranks depression as the most significant cause of disability [<xref ref-type="bibr" rid="ref1">1</xref>]. Statistics show that over 300 million people suffer from depression all over the world, and the number of patients continues to grow [<xref ref-type="bibr" rid="ref2">2</xref>]. Depression detection for potential users can help detect the disease at an early stage and help patients get timely treatment.</p>
      <p>The latest global digital report [<xref ref-type="bibr" rid="ref3">3</xref>] shows that there are 4.62 billion social media users worldwide, which is equivalent to 58.4% of the world’s population. Internet users worldwide spend nearly 7 hours a day on the web and 2 hours and 30 minutes on social media. Over the past year, social media users have increased by an average of more than 1 million per day. All these show that social media plays a central role in our daily lives. Meanwhile, an increasing number of people tend to express their emotions and feelings on Weibo, Twitter, etc. People with depression are willing to post depression-related information on social media, such as negative emotions or depression treatment information [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Therefore, we can obtain a great deal of valuable information about depression from their tweets. The objective of this paper is to predict a label {depression, nondepression} for each user indicating their depressive tendencies by mining their historical posts.</p>
      <p>In recent years, psychology-related social media mining has become a research hotspot in natural language processing. The task of detecting users with depression through historical posts on social media has received extensive attention from researchers. Many computer researchers and psychologists have proposed effective methods to detect depression by extracting emotion, interaction, and other features from texts. Nguyen et al [<xref ref-type="bibr" rid="ref6">6</xref>] extracted emotions, psycholinguistic processes, and content themes in posts to detect users with depression. Shen et al [<xref ref-type="bibr" rid="ref7">7</xref>] constructed well-labeled depression data sets on Twitter and extracted 6 feature groups associated with depression. Tong et al [<xref ref-type="bibr" rid="ref8">8</xref>] extracted 3 discriminative features from users’ posts, and then proposed a new cost-sensitive boosting pruning trees model to detect users with depression. Park et al [<xref ref-type="bibr" rid="ref9">9</xref>] concluded that users with depression prefer to express their status on social media than in real life, so extracting emotional information was essential for depression-detection tasks.</p>
      <p>With the maturity of deep learning, the research models have gradually moved from traditional feature engineering to deep learning methods. Yates et al [<xref ref-type="bibr" rid="ref10">10</xref>] utilized a convolutional neural network (CNN)–based model with multiple inputs for detecting users with depression. Alhanai et al [<xref ref-type="bibr" rid="ref11">11</xref>] used long short-term memory network (LSTM) to concatenate text and audio representation to detect users with depression. Ren et al [<xref ref-type="bibr" rid="ref12">12</xref>] extracted emotional information by combining positive words and negative words. Orabi et al [<xref ref-type="bibr" rid="ref13">13</xref>] investigated the performance differences between recurrent neural network (RNN) models and CNN models in depression detection. Zogan et al [<xref ref-type="bibr" rid="ref14">14</xref>] fused semantic and user behavior information for detecting depression, and proposed the multimodal depression detection with hierarchical attention network (MDHAN).</p>
      <p>All these aforementioned deep learning methods use the entire historical posts of the users. However, it is common for users to share various posts online, and posts related to depression are usually rare. The large number of irrelevant posts contained in historical posts can degrade the performance of the model. <xref rid="figure1" ref-type="fig">Figure 1</xref> illustrates this phenomenon, where posts related to depression are highlighted in red, and the irrelevant posts are highlighted in blue.</p>
      <p>From <xref rid="figure1" ref-type="fig">Figure 1</xref>, we can see that only a small percentage of tweets are related to depression. Gui et al [<xref ref-type="bibr" rid="ref15">15</xref>] selected depression indicator posts by reinforcement learning (RL). The advantage of selecting indicator posts is that it excludes the influence of irrelevant posts. If we take all the user’s posts as input, a large amount of noise will be introduced.</p>
      <p>From this example, we can also see that there are many emotional words in the user’s posts such as “depressed”, “suck”, “die”, “nice”. However, current methods are lacking in deep mining of emotional information and do not well integrate emotional information into the model. Motivated by these, we propose an emotion-based reinforcement attention network (ERAN) for depression detection in this paper. The proposed model effectively improves the accuracy of depression detection by extracting deep emotional features, selecting depression indicator posts based on the current emotional states, and capturing core information through the sentence-level attention.</p>
      <p>The main contributions of this paper can be summarized in the following 3 points:</p>
      <list list-type="bullet">
        <list-item>
          <p>First, we extract emotional features by the pretrained TextCNN and fuse the emotional vectors with the output of the attention layer to classify users.</p>
        </list-item>
        <list-item>
          <p>Second, we improve a reinforcement attention network, which is mainly composed of an RL selection layer and a sentence-level attention layer. The RL selection layer can select depression indicator posts based on the emotional states, and the sentence-level attention captures core information by assigning different weights to posts.</p>
        </list-item>
        <list-item>
          <p>Finally, experimental results show that the proposed model outperforms the state-of-the-art baselines on the multimodal depression data set (MDD).</p>
        </list-item>
      </list>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Sample posts of a depressed user. The posts with red highlights are considered depression indicator posts.</p>
        </caption>
        <graphic xlink:href="medinform_v10i8e37818_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Task Definition</title>
        <p>Let <italic>H<sub>i</sub></italic> = {<italic>p</italic><sup>1</sup><sub>i</sub>, <italic>p</italic><sup>2</sup><sub>i</sub>, ..., <italic>p<sup>T</sup><sub>i</sub></italic>} be the set of <italic>T</italic> historical posts of user <italic>u<sub>i</sub></italic>. The goal of the depression detection is to predict a label <inline-graphic xlink:href="medinform_v10i8e37818_fig6.png" xlink:type="simple" mimetype="image"/> to the user <italic>u<sub>i</sub></italic> based on historical posts to indicate whether the user is depressed or not.</p>
      </sec>
      <sec>
        <title>Model Overview</title>
        <p>In the following, we will introduce the structure of our model for depression detection. The proposed model consists of 2 networks, including an emotion extraction network and an RL attention network. The emotion extraction network is used to capture deep emotional sentiment representation from a user’s historical posts. The RL attention network selects depression indicator posts based on the emotional states and assigns weights for the selected posts by the sentence-level attention. Finally, we concatenate the representations captured by the 2 networks and send them to the classification layer to detect whether the user is depressed or not. <xref rid="figure2" ref-type="fig">Figure 2</xref> shows the architecture of the proposed model.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Architecture of the Emotion-Based Reinforcement Attention Network (ERAN). LSTM: long short-term memory network; RL: reinforcement learning.</p>
          </caption>
          <graphic xlink:href="medinform_v10i8e37818_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Emotion Extraction Network</title>
        <p>Many studies have shown that emotional information is essential for depression detection on social media. However, current methods fail to extract deep emotional semantic information effectively or do not incorporate the emotional representation well into the model. For instance, some methods just simply concatenate sentiment representation with other information. Motivated by this, we used a pretrained TextCNN [<xref ref-type="bibr" rid="ref16">16</xref>] to extract deep sentiment features and feed them to the RL attention network of the proposed model to accomplish deep interactions. For user <italic>u<sub>i</sub></italic>, we input all posts <italic>p<sup>t</sup><sub>i</sub></italic> into a pretrained TextCNN. The TextCNN has been pretrained on an emotion classification task labeled as positive, negative, and neutral. After training, the TextCNN is used to extract the emotional information of each post. We regard the last hidden layer vector of the TextCNN as emotion vector <inline-graphic xlink:href="medinform_v10i8e37818_fig7.png" xlink:type="simple" mimetype="image"/>. The final emotional semantic representation for all <italic>T</italic>-posts of user <italic>u<sub>i</sub></italic> is defined as <inline-graphic xlink:href="medinform_v10i8e37818_fig8.png" xlink:type="simple" mimetype="image"/>, which is the expectation of <inline-graphic xlink:href="medinform_v10i8e37818_fig7.png" xlink:type="simple" mimetype="image"/>:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i8e37818_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <italic>T</italic> is the number of posts by user <italic>u<sub>i</sub></italic> and <italic>t</italic> is <italic>t</italic>th post of the user <italic>u<sub>i</sub></italic>.</p>
        <p>Let <inline-graphic xlink:href="medinform_v10i8e37818_fig10.png" xlink:type="simple" mimetype="image"/> denote the representation of a user’s post, with <italic>n</italic> as the length of the padded post. <inline-graphic xlink:href="medinform_v10i8e37818_fig11.png" xlink:type="simple" mimetype="image"/> represents the concatenation operator. We utilize word2vec [<xref ref-type="bibr" rid="ref17">17</xref>] to encode each word <italic>w<sub>i</sub></italic> as a <italic>d</italic>-dimensional word embedding <inline-graphic xlink:href="medinform_v10i8e37818_fig12.png" xlink:type="simple" mimetype="image"/>.</p>
        <p>Then, we input the text sequence <italic>X</italic><sub>1:</sub><italic><sub>n</sub></italic> into a single-layer CNN. The convolutional layer of the CNN has 3 filters <inline-graphic xlink:href="medinform_v10i8e37818_fig13.png" xlink:type="simple" mimetype="image"/>. For each <inline-graphic xlink:href="medinform_v10i8e37818_fig14.png" xlink:type="simple" mimetype="image"/>, there is <italic>Z</italic> filter <italic>F<sub>k</sub></italic> for extracting complementary information. And then, we apply them to a window <inline-graphic xlink:href="medinform_v10i8e37818_fig15.png" xlink:type="simple" mimetype="image"/> to generate a new feature vector. The feature vector <italic>c<sub>k,j</sub></italic> is calculated by:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i8e37818_fig16.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <italic>α</italic>(·)denotes a nonlinear activation function; <inline-graphic xlink:href="medinform_v10i8e37818_fig15.png" xlink:type="simple" mimetype="image"/> is a window with <italic>h<sub>k</sub></italic> words, and <inline-graphic xlink:href="medinform_v10i8e37818_fig17.png" xlink:type="simple" mimetype="image"/> is a bias. For each window in the post {<italic>X</italic><sub>1:</sub><italic><sub>h</sub></italic>, <italic>X</italic><sub>2:</sub><italic><sub>h</sub></italic><sub>+1</sub>, …, <italic>X<sub>n</sub></italic><sub>–</sub><italic><sub>h</sub></italic><sub>+1:</sub><italic><sub>n</sub></italic>}, the above actions are taken to get a feature map <inline-graphic xlink:href="medinform_v10i8e37818_fig18.png" xlink:type="simple" mimetype="image"/>, where <inline-graphic xlink:href="medinform_v10i8e37818_fig19.png" xlink:type="simple" mimetype="image"/>, and <italic>h<sub>k</sub></italic> is the height of the convolution kernel.</p>
        <p>After convolution operation, each filter <italic>F<sub>k</sub></italic> creates <italic>Z</italic> feature maps <inline-graphic xlink:href="medinform_v10i8e37818_fig20.png" xlink:type="simple" mimetype="image"/>. Following this, to extract the maximum features, we connect a max-pooling operation [<xref ref-type="bibr" rid="ref18">18</xref>] to all feature maps. The output is calculated as <inline-graphic xlink:href="medinform_v10i8e37818_fig21.png" xlink:type="simple" mimetype="image"/>. The output of max-pooling, which covers all feature maps <inline-graphic xlink:href="medinform_v10i8e37818_fig22.png" xlink:type="simple" mimetype="image"/>, is the concatenation of each <bold><italic>o</italic></bold><italic><sub>k</sub></italic>. Finally, <inline-graphic xlink:href="medinform_v10i8e37818_fig23.png" xlink:type="simple" mimetype="image"/> is entered into a fully connected layer. The output of the classification layer is calculated as:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i8e37818_fig24.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <inline-graphic xlink:href="medinform_v10i8e37818_fig25.png" xlink:type="simple" mimetype="image"/>, and <inline-graphic xlink:href="medinform_v10i8e37818_fig26.png" xlink:type="simple" mimetype="image"/>; <italic>α</italic>(·) is a nonlinear activation function. The fully connected layer is followed by a sigmoid-classification layer with 3 classes, and <italic>σ</italic>(·) represents sigmoid operation.</p>
      </sec>
      <sec>
        <title>RL Attention Network</title>
        <sec>
          <title>Overview</title>
          <p>Users’ historical posts usually contain various content, and only a small fraction may be related to depression. Those irrelevant posts pose a challenge to identify users’ depressive tendencies effectively, so we need to develop a model to select only depression-related posts. The historical posts of the user <italic>u<sub>i</sub></italic> are denoted as <italic>H<sub>i</sub></italic> = {<italic>p</italic><sup>1</sup><italic><sub>i</sub></italic>, <italic>p</italic><sup>2</sup><italic><sub>i</sub></italic>, …, <italic>p<sup>T</sup><sub>i</sub></italic>}, and the depression indicator posts are denoted as <inline-graphic xlink:href="medinform_v10i8e37818_fig27.png" xlink:type="simple" mimetype="image"/>.</p>
          <p>The structure of this network includes (1) a bidirectional LSTM (BiLSTM) that generates contextual representation, (2) an RL selection layer that chooses depression-related posts based on the current emotional states from <italic>H<sub>i</sub></italic>, and (3) a sentence-level attention layer that allows the model to pay more attention to higher-weight posts.</p>
        </sec>
        <sec>
          <title>BiLSTM Layer</title>
          <p>Graves et al [<xref ref-type="bibr" rid="ref19">19</xref>] proposed the BiLSTM, which has been widely used in natural language processing to capture long-distance contextual dependency. Superior to LSTM [<xref ref-type="bibr" rid="ref20">20</xref>], BiLSTM can capture bidirectional semantic dependencies. Inspired by this, we utilized BiLSTM to encode contextual information. The algorithm processes of LSTM are as follows:</p>
          <disp-formula><italic>f<sub>k</sub></italic> = <italic>σ</italic>(<italic>W<sup>f</sup></italic>·[<italic>h<sub>k</sub></italic><sub>–1</sub>, <italic>x<sub>k</sub></italic>] + <italic>b<sup>f</sup></italic>) (<bold>4</bold>)</disp-formula>
          <disp-formula><italic>i<sub>k</sub></italic> = <italic>σ</italic>(<italic>W<sup>i</sup></italic>·[<italic>h<sub>k</sub></italic><sub>–1</sub>, <italic>x<sub>k</sub></italic>] + <italic>b<sup>i</sup></italic>) (<bold>5</bold>)</disp-formula>
          <disp-formula><italic>o<sub>k</sub></italic> = <italic>σ</italic>(<italic>W<sup>o</sup></italic>·[<italic>h<sub>k</sub></italic><sub>–1</sub>, <italic>x<sub>k</sub></italic>] + <italic>b<sup>o</sup></italic>) (<bold>6</bold>)</disp-formula>
          <disp-formula><italic>c<sub>k</sub></italic> = tanh(<italic>W<sup>c</sup></italic>·[<italic>h<sub>k</sub></italic><sub>–1</sub>, <italic>x<sub>k</sub></italic>] + <italic>b<sup>c</sup></italic>) (<bold>7</bold>)</disp-formula>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e37818_fig28.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e37818_fig29.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <italic>W<sup>f</sup></italic>, <italic>W<sup>i</sup></italic>, <italic>W<sup>o</sup></italic>, and <italic>W<sup>c</sup></italic> are parameters that can be trained. <inline-graphic xlink:href="medinform_v10i8e37818_fig30.png" xlink:type="simple" mimetype="image"/> represents the element-wise multiplication operation, <italic>x<sub>k</sub></italic> denotes the pretrained word2vec embedding, and <italic>σ</italic>(·) represents sigmoid function.</p>
          <p>Given an input sequence <italic>X</italic> = [<italic>x</italic><sub>1</sub>, <italic>x</italic><sub>2</sub>, ..., <italic>x<sub>n</sub></italic>], the forward hidden state is <inline-graphic xlink:href="medinform_v10i8e37818_fig31.png" xlink:type="simple" mimetype="image"/>, and the backward hidden state is <inline-graphic xlink:href="medinform_v10i8e37818_fig32.png" xlink:type="simple" mimetype="image"/>. The representation of the sentence is:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e37818_fig33.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>For user <italic>u<sub>i</sub></italic>, the representation of posts is <inline-graphic xlink:href="medinform_v10i8e37818_fig34.png" xlink:type="simple" mimetype="image"/>, where <italic>T</italic> is the number of posts.</p>
        </sec>
        <sec>
          <title>RL Selection Layer</title>
          <p>Because we only have user-level labels, it becomes a key challenge to select posts related to depression. Gui et al [<xref ref-type="bibr" rid="ref15">15</xref>] utilized RL to select depression indicator posts. However, their method still has a high recognition accuracy in the unselected posts, which indicates that this model misses many important posts. Inspired by this, we introduced emotional states to improve the selection strategy based on RL.</p>
          <p>RL is a way of learning by “trial and error” in the environment. It has 3 important factors: agent, environment, and reward, where the agent is the selector. At each step <italic>t</italic>, the agent executes the action <italic>a<sup>t</sup></italic> based on the state <italic>s<sup>t</sup></italic> to select the current post or not. After executing all posts, the classifier gives the agent a total reward to evaluate the performance of this policy. Policy gradient [<xref ref-type="bibr" rid="ref21">21</xref>] is an optimization method of parameterizing the policy, which optimizes the parameter <italic>θ</italic> to maximize the total reward. Next, we will explain these parts.</p>
          <p>In this layer, after encoding, the post <italic>p<sup>t</sup></italic> is denoted by the vector <inline-graphic xlink:href="medinform_v10i8e37818_fig35.png" xlink:type="simple" mimetype="image"/>. At each step <italic>t</italic>, the current post is <inline-graphic xlink:href="medinform_v10i8e37818_fig36.png" xlink:type="simple" mimetype="image"/>, the selected posts set is <inline-graphic xlink:href="medinform_v10i8e37818_fig37.png" xlink:type="simple" mimetype="image"/>, and the unselected posts set is <inline-graphic xlink:href="medinform_v10i8e37818_fig38.png" xlink:type="simple" mimetype="image"/>. If action <italic>a<sup>t</sup></italic>=1, the post <inline-graphic xlink:href="medinform_v10i8e37818_fig39.png" xlink:type="simple" mimetype="image"/> is appended to <inline-graphic xlink:href="medinform_v10i8e37818_fig40.png" xlink:type="simple" mimetype="image"/>; otherwise <inline-graphic xlink:href="medinform_v10i8e37818_fig41.png" xlink:type="simple" mimetype="image"/> is appended to <bold><italic>H</italic></bold><sup>non</sup>, where <inline-graphic xlink:href="medinform_v10i8e37818_fig42.png" xlink:type="simple" mimetype="image"/>. The state <italic>s<sup>t</sup></italic> with emotional vector is represented as follows:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e37818_fig43.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <inline-graphic xlink:href="medinform_v10i8e37818_fig11.png" xlink:type="simple" mimetype="image"/> represents the concatenation operation, and avg(·) represents the average operation. <inline-graphic xlink:href="medinform_v10i8e37818_fig7.png" xlink:type="simple" mimetype="image"/> denotes the emotion vector of the <italic>t</italic>th post of <italic>u<sub>i</sub></italic>. The current state <italic>s<sup>t</sup></italic>incorporates the emotion vector, which enables the agent to take better actions. The action obeys the following policy to take actions:</p>
          <disp-formula>π(<italic>a<sup>t</sup>&#124;s<sup>t</sup></italic>; <italic>θ</italic>) = <italic>p<sub>θ</sub></italic>(<italic>a<sup>t</sup>&#124;s<sup>t</sup></italic>, <italic>θ</italic>) <bold>(12)</bold></disp-formula>
          <p>where <italic>θ</italic> represents the parameter of the policy function and is optimized to maximize the total reward, (<italic>a<sup>t</sup>&#124;s<sup>t</sup></italic> ;<italic>θ</italic>) represents the policy function that the agent follows to take action, and <italic>p<sub>θ</sub></italic> (<italic>a<sup>t</sup>&#124;s<sup>t</sup></italic>, <italic>θ</italic>) is a probability distribution over the action, and we serialize the discrete policy via the <italic>MLP</italic> layer.</p>
          <p>For each episode <italic>τ</italic> = {<italic>s</italic><sup>1</sup>, <italic>a</italic><sup>1</sup>, <italic>s</italic><sup>2</sup>, <italic>a</italic><sup>2</sup>, ..., <italic>s<sup>T</sup></italic>, <italic>a<sup>T</sup></italic>, END} of user <italic>u<sub>i</sub></italic>, the classifier will return a reward after all the selections are made. The objective is to maximize the reward of the episode. The reward is defined as the predicted probability after executing this episode:</p>
          <disp-formula><italic>R</italic>(<italic>τ</italic>) = <italic>p</italic>(<italic>y<sub>i</sub></italic>&#124;<italic>H</italic><sup>dep</sup>; <italic>θ</italic>′) (<bold>13</bold>)</disp-formula>
          <p>where <italic>θ</italic>′ represents the parameters of the classification layer and is optimized by the depression classifier.</p>
          <p>After <italic>N</italic> sampling for user <italic>u<sub>i</sub></italic>, we get <italic>N</italic> episodes <italic>τ</italic> = {<italic>τ</italic><sub>1</sub>, ..., <italic>τ</italic><sub>N</sub>}. To optimize the parameter <italic>θ</italic>, we calculate the expectation of <italic>R</italic>(<italic>τ</italic>). The calculation processes are as follows:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e37818_fig44.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e37818_fig45.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>Here, because the transfer between states is Markovian, we will use the chain rule to calculate <italic>p</italic>(<italic>τ</italic>&#124;<italic>θ</italic>), as shown in Equation (15).</p>
          <p>To maximize <inline-graphic xlink:href="medinform_v10i8e37818_fig46.png" xlink:type="simple" mimetype="image"/>, we calculate its gradient against <italic>θ</italic>. The equation is shown as follows:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e37818_fig47.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>Here, to simplify the objective function, we assume that the probability of each occurring is 1/<italic>N</italic>. In the equation, <inline-graphic xlink:href="medinform_v10i8e37818_fig48.png" xlink:type="simple" mimetype="image"/> is a baseline value. If <italic>R</italic>(<italic>τ<sub>n</sub></italic>) – <italic>b</italic> is positive, the optimization will proceed toward increasing the probability <italic>p</italic>(<italic>a<sup>t</sup></italic>&#124;<italic>s<sup>t</sup></italic>, <italic>θ</italic>). If <italic>R</italic>(<italic>τ<sub>n</sub></italic>) – <italic>b</italic> is negative, the optimization will proceed toward reducing the probability. Thus, is updated in this way: <inline-graphic xlink:href="medinform_v10i8e37818_fig49.png" xlink:type="simple" mimetype="image"/>, where <italic>α</italic> is the learning rate.</p>
          <p>Finally, the loss function of this part is calculated by:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e37818_fig50.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>Here, maximizing <italic>R</italic>(<italic>τ</italic>) is minimizing loss<sub>1</sub>(<italic>θ</italic>) actually. The parameters, as well as the loss, will be optimized by the gradient. After the selection of agent, <inline-graphic xlink:href="medinform_v10i8e37818_fig51.png" xlink:type="simple" mimetype="image"/> contains the posts related to depression. Then we feed <bold><italic>H</italic></bold><italic><sup>dep</sup></italic> into the attention layer.</p>
        </sec>
        <sec>
          <title>The Sentence-Level Attention Layer</title>
          <p>The semantics of a document can be described by a few sentences in the document. The model will not capture the key information if it treats each sentence fairly. To solve the document classification problem, Yang et al [<xref ref-type="bibr" rid="ref22">22</xref>] designed the hierarchical attention network. This network contains a word-level attention used to focus on keywords and a sentence-level attention used to focus on critical sentence. Inspired by this, we utilized the sentence-level attention mechanism to enable our model to focus on relevant posts. It will create an attention weight for each post in <bold><italic>H</italic></bold><italic><sup>dep</sup></italic>, and the model will focus more on tweets with higher weights.</p>
          <p>We assume that the depression indicator posts set of <italic>u<sub>i</sub></italic> is <inline-graphic xlink:href="medinform_v10i8e37818_fig52.png" xlink:type="simple" mimetype="image"/>, which has <italic>M</italic> indicator posts after padding. For the vector <inline-graphic xlink:href="medinform_v10i8e37818_fig53.png" xlink:type="simple" mimetype="image"/>, the attention weight is calculated by:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e37818_fig54.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e37818_fig55.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <disp-formula>
            <graphic xlink:href="medinform_v10i8e37818_fig56.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <inline-graphic xlink:href="medinform_v10i8e37818_fig57.png" xlink:type="simple" mimetype="image"/> is the final posts representation that summarizes all the posts in <bold><italic>H</italic></bold><italic><sub>i</sub><sup>dep</sup></italic> <inline-graphic xlink:href="medinform_v10i8e37818_fig58.png" xlink:type="simple" mimetype="image"/> is a vector used to measure the weight of the posts and is randomly initialized. During the training process, <inline-graphic xlink:href="medinform_v10i8e37818_fig59.png" xlink:type="simple" mimetype="image"/> can be updated.</p>
        </sec>
      </sec>
      <sec>
        <title>Final Prediction</title>
        <p>In the classifier, we concatenate the output of attention layer <inline-graphic xlink:href="medinform_v10i8e37818_fig57.png" xlink:type="simple" mimetype="image"/> and emotion representation <inline-graphic xlink:href="medinform_v10i8e37818_fig8.png" xlink:type="simple" mimetype="image"/> to form the unified text representation <inline-graphic xlink:href="medinform_v10i8e37818_fig60.png" xlink:type="simple" mimetype="image"/>. Finally, <inline-graphic xlink:href="medinform_v10i8e37818_fig61.png" xlink:type="simple" mimetype="image"/> is projected to the output layer having 2 neurons with a soft-max activation. The categorical cross-entropy loss function and the soft-max probability are calculated as follows:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i8e37818_fig62.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <disp-formula>
          <graphic xlink:href="medinform_v10i8e37818_fig63.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where, <italic>j</italic> represents the categories, <italic>U</italic> is the total number of users in data set, <inline-graphic xlink:href="medinform_v10i8e37818_fig64.png" xlink:type="simple" mimetype="image"/> represents the classification probability, and <bold><italic>y</italic></bold><italic><sub>i</sub><sup>j</sup></italic> is the ground truth.</p>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>The data set and methods used in this work are publicly available and do not involve any ethical or moral issues.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Sets</title>
        <p>Shen et al [<xref ref-type="bibr" rid="ref7">7</xref>] proposed the MDD data sets, which contain well-labeled data sets <italic>D</italic><sub>1</sub>, <italic>D</italic><sub>2</sub>, and an unlabeled data set <italic>D</italic><sub>3</sub> on Twitter. These 3 data sets collect posts from users on Twitter at specific times. <xref ref-type="table" rid="table1">Table 1</xref> describes the statistics of these 3 data sets, including the number of users and tweets.</p>
        <list list-type="bullet">
          <list-item>
            <p>Depression data set <italic>D1</italic>: Based on the tweets between 2009 and 2016, if users’ tweets satisfy the strict pattern “(I’m/ I was/ I am/ I’ve been) diagnosed depression,” they will be labeled as depressed.</p>
          </list-item>
          <list-item>
            <p>Nondepressed data set <italic>D2</italic>: In this data set, only users who have never posted tweets containing “depress” are marked as nondepressed.</p>
          </list-item>
          <list-item>
            <p>Depression-candidate data set <italic>D3</italic>: In this data set, users are obtained if their anchor tweets loosely contain “depress.” In this way, <italic>D3</italic> contains more users with depression than randomly sampling.</p>
          </list-item>
        </list>
        <p>In our experiments, we added all the users in <italic>D</italic><sub>1</sub> to the data set. In addition, we randomly selected the same number of users in <italic>D</italic><sub>2</sub> to balance the data set. Selection rules excluded users with less than 15 posts, or users with non-English posts. The data set used in this paper contained 2804 Twitter users and over 500,000 posts made by them. Finally, we used 2243/2804 (79.99%) users in the data set to train our model and 561/2804 (20%) users to test our model.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Summary of the data sets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Data set</td>
                <td>Label</td>
                <td>User</td>
                <td>Tweets</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>
                  <italic>D</italic>
                  <sub>1</sub>
                </td>
                <td>Depressed</td>
                <td>1402</td>
                <td>292,564</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>D</italic>
                  <sub>2</sub>
                </td>
                <td>Nondepressed</td>
                <td>&#62;300 million</td>
                <td>&#62;10 million</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>D</italic>
                  <sub>3</sub>
                </td>
                <td>Nonlabeled</td>
                <td>36,993</td>
                <td>35,076,677</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Evaluation Metrics</title>
        <p>In the experimental phase, we used accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score to evaluate the performance of the proposed model. <italic>F</italic><sub>1</sub>-score is calculated as follows:</p>
        <disp-formula><italic>F</italic><sub>1</sub> = (2·<italic>P</italic>·<italic>R</italic>)/(<italic>P</italic> + <italic>R</italic>) (<bold>23</bold>)</disp-formula>
        <p>where <italic>R</italic> = <italic>TP</italic>/(<italic>TP</italic>+<italic>FN</italic>) and <italic>P</italic> = <italic>TP</italic>/(<italic>TP</italic>+<italic>FP</italic>); here, <italic>P</italic> is precision, <italic>R</italic> represents recall, <italic>TP</italic> represents true-positive prediction, <italic>FN</italic> is false-negative prediction, and <italic>FP</italic> is false-positive prediction.</p>
      </sec>
      <sec>
        <title>Experimental Setting</title>
        <p>During the experimental phase, the hyperparameters were randomly initialized based on our experience. The pretrained word2vec is used to initialize the word embeddings. The Adam optimizer [<xref ref-type="bibr" rid="ref23">23</xref>] was used to optimize the hyperparameters. Other hyperparameter settings are shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <p>The training of our ERAN model is based on the operating system of Ubuntu 18.04, using PyTorch version 1.9.0 and Python version 3.7.0. The graphics processing unit is NVIDIA TITAN Xp with 12-GB memory.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Values of hyperparameters.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Hyperparameters</td>
                <td>Value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Word embedding dimension</td>
                <td>300</td>
              </tr>
              <tr valign="top">
                <td>BiLSTM<sup>a</sup> hidden units</td>
                <td>200</td>
              </tr>
              <tr valign="top">
                <td>Dropout rate</td>
                <td>0.5</td>
              </tr>
              <tr valign="top">
                <td>Batch size</td>
                <td>128</td>
              </tr>
              <tr valign="top">
                <td>Learning rate</td>
                <td>0.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>BiLSTM: bidirectional long short-term memory network.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Comparison With Existing Methods</title>
        <p>Here, we describe the baseline methods that we compared with.</p>
        <list list-type="bullet">
          <list-item>
            <p>Naïve Bayesian (NB): NB [<xref ref-type="bibr" rid="ref24">24</xref>] is widely used in classification tasks. The classifier accepts all features to detect the user’s depressive tendencies.</p>
          </list-item>
          <list-item>
            <p>Wasserstein Dictionary Learning (WDL): Rolet et al [<xref ref-type="bibr" rid="ref25">25</xref>] proposed the WDL. It considers the Wasserstein distance as the fitting error to leverage the similarity shared by the features.</p>
          </list-item>
          <list-item>
            <p>Multiple Social Networking Learning (MSNL): Song et al [<xref ref-type="bibr" rid="ref26">26</xref>] proposed the MSNL model to solve the volunteerism tendency prediction problem.</p>
          </list-item>
          <list-item>
            <p>Multimodal Depressive Dictionary Learning (MDL): Shen et al [<xref ref-type="bibr" rid="ref7">7</xref>] proposed the MDL model by combining the multimodal strategy and dictionary learning strategy.</p>
          </list-item>
          <list-item>
            <p>CNN/LSTM + RL: Gui et al [<xref ref-type="bibr" rid="ref15">15</xref>] proposed an RL model to select depression indicator posts.</p>
          </list-item>
          <list-item>
            <p>MDHAN: Zogan et al [<xref ref-type="bibr" rid="ref14">14</xref>] proposed MDHAN. They extracted semantic information using a hierarchical attention network and user behavior by a multimodal encoder.</p>
          </list-item>
        </list>
        <p>We compared the performance of the proposed model (ERAN) with other existing models on the MDD data set. The experimental results are shown in <xref ref-type="table" rid="table3">Table 3</xref>.</p>
        <p>From the first 4 classic methods, MDL achieves the best performance with 78.6% in <italic>F</italic><sub>1</sub>-score, indicating the validity of the multimodal depressive dictionary. The results based on BiLSTM are better than those based on LSTM, indicating that the bidirectional encoder can capture more helpful information. Similarly, the performances based on BiLSTM (Att) are better than those based on BiLSTM, which can demonstrate that the sentence-level attention mechanism can capture more important depression information.</p>
        <p>With the popularity of pretrained approaches, we experimented with 2 pretrained models, Bidirectional Encoder Representations from Transformers (BERT) and Robustly Optimized BERT pre-training Approach (RoBERTa) [<xref ref-type="bibr" rid="ref27">27</xref>], and fine-tuned them on our data set. From <xref ref-type="table" rid="table3">Table 3</xref>, we can see that the simple pretraining models do not work very well, which may be due to the sparse distribution of depression-related words causing the pretrained models to fail to maximize their ability.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Results compared with the baseline models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Accuracy</td>
                <td>Precision</td>
                <td>Recall</td>
                <td><italic>F</italic><sub>1</sub>-score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>NB<sup>a</sup> [<xref ref-type="bibr" rid="ref22">22</xref>]</td>
                <td>0.636</td>
                <td>0.724</td>
                <td>0.623</td>
                <td>0.588</td>
              </tr>
              <tr valign="top">
                <td>WDL<sup>b</sup> [<xref ref-type="bibr" rid="ref24">24</xref>]</td>
                <td>0.761</td>
                <td>0.763</td>
                <td>0.762</td>
                <td>0.762</td>
              </tr>
              <tr valign="top">
                <td>MSNL<sup>c</sup> [<xref ref-type="bibr" rid="ref25">25</xref>]</td>
                <td>0.782</td>
                <td>0.781</td>
                <td>0.781</td>
                <td>0.781</td>
              </tr>
              <tr valign="top">
                <td>MDL<sup>d</sup> [<xref ref-type="bibr" rid="ref6">6</xref>]</td>
                <td>0.790</td>
                <td>0.786</td>
                <td>0.786</td>
                <td>0.786</td>
              </tr>
              <tr valign="top">
                <td>LSTM<sup>e</sup></td>
                <td>0.797</td>
                <td>0.812</td>
                <td>0.813</td>
                <td>0.812</td>
              </tr>
              <tr valign="top">
                <td>BiLSTM<sup>f</sup></td>
                <td>0.805</td>
                <td>0.817</td>
                <td>0.818</td>
                <td>0.817</td>
              </tr>
              <tr valign="top">
                <td>BiLSTM (Att<sup>g</sup>)</td>
                <td>0.817</td>
                <td>0.828</td>
                <td>0.828</td>
                <td>0.828</td>
              </tr>
              <tr valign="top">
                <td>BERT<sup>h</sup> (base) [<xref ref-type="bibr" rid="ref27">27</xref>]</td>
                <td>0.845</td>
                <td>0.883</td>
                <td>0.825</td>
                <td>0.853</td>
              </tr>
              <tr valign="top">
                <td>RoBERTa<sup>i</sup> (base) [<xref ref-type="bibr" rid="ref27">27</xref>]</td>
                <td>0.851</td>
                <td>0.902</td>
                <td>0.837</td>
                <td>0.868</td>
              </tr>
              <tr valign="top">
                <td>CNN<sup>j</sup> + RL<sup>k</sup> [<xref ref-type="bibr" rid="ref14">14</xref>]</td>
                <td>0.871</td>
                <td>0.871</td>
                <td>0.871</td>
                <td>0.871</td>
              </tr>
              <tr valign="top">
                <td>LSTM + RL [<xref ref-type="bibr" rid="ref14">14</xref>]</td>
                <td>0.870</td>
                <td>0.872</td>
                <td>0.870</td>
                <td>0.871</td>
              </tr>
              <tr valign="top">
                <td>MDHAN<sup>l</sup> [<xref ref-type="bibr" rid="ref13">13</xref>]</td>
                <td>0.895</td>
                <td>0.902</td>
                <td>0.892</td>
                <td>0.893</td>
              </tr>
              <tr valign="top">
                <td>ERAN<sup>m</sup> (ours)</td>
                <td>0.906</td>
                <td>0.912</td>
                <td>0.897</td>
                <td>0.904</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>NB: naïve Bayesian.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>WDL: Wasserstein Dictionary Learning.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>MSNL: Multiple Social Networking Learning.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>MDL: Multimodal Depressive Dictionary Learning.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>LSTM: long short-term memory network.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>BiLSTM: bidirectional long short-term memory network.</p>
            </fn>
            <fn id="table3fn7">
              <p><sup>g</sup>Att: attention.</p>
            </fn>
            <fn id="table3fn8">
              <p><sup>h</sup>BERT: Bidirectional Encoder Representation from Transformers.</p>
            </fn>
            <fn id="table3fn9">
              <p><sup>i</sup>RoBERTa: Robustly Optimized BERT pre-training Approach.</p>
            </fn>
            <fn id="table3fn10">
              <p><sup>j</sup>CNN: convolutional neural network.</p>
            </fn>
            <fn id="table3fn11">
              <p><sup>k</sup>RL: reinforcement learning.</p>
            </fn>
            <fn id="table3fn12">
              <p><sup>l</sup>MDHAN: multimodal depression detection with hierarchical attention network.</p>
            </fn>
            <fn id="table3fn13">
              <p><sup>m</sup>ERAN: emotion-based reinforcement attention network.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The CNN/LSTM + RL models use RL to select indicator posts, which verifies the validity of the selection strategy. The MDHAN model proves that the multimodal features are also important by fusing semantic information with user behavior information.</p>
        <p>The proposed ERAN model achieves optimal results because we fused emotional information and selected depression indicator posts based on emotional states. In addition, the sentence-level attention can capture core posts.</p>
      </sec>
      <sec>
        <title>Ablation Study</title>
        <p>Ablation experiments were conducted to validate the necessity of the emotion extraction network, the RL selection layer, and the sentence-level attention. The study is performed by removing one module at a time. The results of the ablation experiments are presented in <xref rid="figure3" ref-type="fig">Figure 3</xref>.</p>
        <p>Emotion-based BiLSTM attention network (EBAtt) is the model that removes the RL selection layer from the proposed model and uses all user posts. Reinforcement learning attention network (RLAtt) is the model that removes the emotion extraction network. Emotion-based reinforcement learning network (ERN) is the model that substitutes the sentence-level attention with the averaging operation. We can see that the ERAN model proposed in this paper performs best. Although ERAN is lower than ERN in precision, it is higher in the other 3 metrics. The sentence-level attention can improve the performance, demonstrating that it can capture more important posts.</p>
        <p>EBAtt extracts semantic information on all posts by BiLSTM and fuses it with emotional representation. Results show that the <italic>F</italic><sub>1</sub>-score of EBAtt decreases by 2.9% compared with the proposed model, which indicates the necessity of selecting depression indicator posts.</p>
        <p>RLAtt is the model after removing the emotion extraction network from ERAN. Similarly, the state of the RL selection layer does not contain the emotion vector. The <italic>F</italic><sub>1</sub>-score of RLAtt is lower than the proposed model by 3.1%, which indicates that the emotional information improves our model the most.</p>
        <p>From the results, we can conclude that extracting emotional information through the pretrained TextCNN is beneficial for depression detection task. Selecting depression indicator posts based on emotional states is also necessary for depression detection. In addition, the sentence-level attention layer can focus on useful posts.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Results of ablation experiments. Emotion-Based Reinforcement Attention Network (ERAN) is the proposed model, and the remaining three are the models after removing one module of ERAN. Acc: accuracy; EBatt: emotion-based BiLSTM (bidirectional long short-term memory network) attention network; ERN: emotion-based reinforcement learning network; F1: <italic>F</italic><sub>1</sub>-score; P: precision; R: recall; RLAtt: reinforcement learning attention; </p>
          </caption>
          <graphic xlink:href="medinform_v10i8e37818_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>The Effectiveness of The RL Selection Layer</title>
        <p>We train the proposed model to generate 2 subsets of depression-related and unselected posts from the original data set. Following this, we obtain 3 data sets, the selected indicator data set <italic>H<sup>dep</sup></italic>, the unselected data set <italic>H<sup>non</sup></italic>, and the original data set <italic>H<sup>orig</sup></italic>. The baseline model BiLSTM is then trained on each of these 3 data sets to verify the effectiveness of the RL selection layer. <xref rid="figure4" ref-type="fig">Figure 4</xref> illustrates the results of the baseline model BiLSTM on the 3 data sets.</p>
        <p>From <xref rid="figure4" ref-type="fig">Figure 4</xref>, we can conclude that the model trained on <italic>H<sup>dep</sup></italic> performs best. Meanwhile, the model trained on <italic>H<sup>non</sup></italic> achieves worse performance than the one trained on <italic>H<sup>orig</sup></italic>, which demonstrates the effectiveness of the RL selection.</p>
        <p>To verify the effectiveness of introducing sentiment vectors in the RL selection module, we removed the sentiment vector <inline-graphic xlink:href="medinform_v10i8e37818_fig7.png" xlink:type="simple" mimetype="image"/> in the state <italic>s<sup>t</sup></italic>. The ablation experiment achieves 88.3%, 88.1%, 87.3%, and 87.7% in accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-scores, respectively. Through the results of the ablation experiment, we can find that the performance of the model decreases after removing the sentiment vectors from the RL selection module, which proves that the sentiment information is helpful for selecting depression indicator posts.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Comparative results of BiLSTM trained on the selected posts, the unselected posts, and the original posts. Acc: accuracy; BiLSTM: bidirectional long short-term memory network; F1: <italic>F</italic><sub>1</sub>-score; P: precision; R: recall.</p>
          </caption>
          <graphic xlink:href="medinform_v10i8e37818_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Attention Visualization and Error Analysis</title>
        <p>In this section, we extracted attention weights and visualized them to verify the validity of the sentence-level attention layer and the reasonableness of the selected posts. We have selected a part of the results of the users as examples, who are called “___mandyy” and “Adri.” The results of attention visualization are illustrated in <xref rid="figure5" ref-type="fig">Figure 5</xref>.</p>
        <p>The first example shows that the first post has the highest weight, where “my depression” indicates that the user has depression. The second post also contains the words “depression”, “me”, etc. Thus, “___mandyy” is finally classified as having “depression.” As we can see, many of the selected posts of this user with depression are of negative sentiment, suggesting a strong association between depression and negative emotions.</p>
        <p>The second user is the one we have used as an example in <xref rid="figure1" ref-type="fig">Figure 1</xref>. From the results of the visualization, we can observe that the fifth post has the highest weight. Classification results indicate that the user is indeed depressed. However, the posts “The view’s really nice from here.” and “I’m so proud of bts they deserve everything” are irrelevant to depression. In addition, the model assigns high weight to the first irrelevant post. One possible reason for choosing these posts is that they contain strong emotional expressions. We think it can be improved by developing a stricter selection strategy.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Examples of attention visualization. Different colors represent different weights. The deeper the color, the greater the weight of the post.</p>
          </caption>
          <graphic xlink:href="medinform_v10i8e37818_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Based on the results, we can observe that introducing emotional information can be very helpful for depression detection tasks, indicating that emotional characteristics are strongly associated with depression. The strategy of selecting depression indicator posts from historical posts is critical to our model because it excludes the effect of irrelevant information. As only user-level labels are in the data set, we use RL to select posts rather than supervised learning. Furthermore, the fusion of emotion vectors into agent states is interpretable. The sentence-level attention layer assigns greater weight to relevant posts, which makes the model perform better.</p>
        <p>Although the RL selection layer performs well, the selected posts still contain irrelevant posts with strong emotional expressions. Compared with other optimization methods, the convergence of policy gradient is better. However, this method tends to fall into local optimum and its training speed is slow.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this paper, we addressed the task of depression detection of users on social media by proposing an ERAN. The proposed model contains 2 modules: the emotion extraction network and the RL attention network. It uses the pretrained word2vec embeddings as input. The emotion extraction network captures deep emotional information by a pretrained TextCNN. The RL attention network is composed of the BiLSTM layer, the RL selection layer, and the sentence-level attention layer. The RL selection layer can select depression indicator posts from original posts based on the emotional states, and the attention layer is able to assign greater weight to relevant posts. Results show that the proposed model outperforms the state-of-the-art model. We verified the validity of the emotion extraction network, the RL selection layer, and the sentence-level attention layer through an ablation study and a visualization analysis. The emotional features and selection of indicator posts are necessary for depression detection task.</p>
        <p>The proposed model uses social media data set to detect depression, which can provide a certain degree of diagnostic basis and address the problem of the lack of effective objective diagnosis in the field of depression. In the future work, we will introduce users’ personality information and multimodal information such as visual information to our model. We will further extract more detailed information about depression based on the proposed model to help analyze the pathogenesis of depression as well as accurate treatment.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BiLSTM</term>
          <def>
            <p>bidirectional long short-term memory network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EBAtt</term>
          <def>
            <p>emotion-based BiLSTM attention network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ERAN</term>
          <def>
            <p>emotion-based reinforcement attention network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ERN</term>
          <def>
            <p>emotion-based reinforcement learning network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LSTM</term>
          <def>
            <p>long short-term memory network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">MDHAN</term>
          <def>
            <p>multimodal depression detection with hierarchical attention network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">MDD</term>
          <def>
            <p>multimodal depression data set</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">MDL</term>
          <def>
            <p>Multimodal Depressive Dictionary Learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">MSNL</term>
          <def>
            <p>Multiple Social Networking Learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">NB</term>
          <def>
            <p>naïve Bayesian</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">RL</term>
          <def>
            <p>reinforcement learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">RLAtt</term>
          <def>
            <p>reinforcement learning attention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">RNN</term>
          <def>
            <p>recurrent neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">RoBERTa</term>
          <def>
            <p>Robustly Optimized BERT pre-training Approach</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">WDL</term>
          <def>
            <p>Wasserstein Dictionary Learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The publication of this paper is funded by grants from the Natural Science Foundation of China (No. 62006034), Natural Science Foundation of Liaoning Province (No. 2021-BS-067), the Fundamental Research Funds for the Central Universities [No. DUT21RC(3)015], and the major science and technology projects of Yunnan Province (202002ab080001-1).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>BC performed the experiments and wrote the paper. JW and YZ provided theoretical guidance and the revision of this paper. HL, LY, and BX contributed to the algorithm design.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Depression and other common mental disorders: global health estimates</article-title>
          <source>World Health Organization</source>
          <year>2017</year>
          <access-date>2022-07-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://apps.who.int/iris/bitstream/handle/10665/254610/WHO-MSD-MER-2017.2-eng.pdf">https://apps.who.int/iris/bitstream/handle/10665/254610/WHO-MSD-MER-2017.2-eng.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yadollahpour</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nasrollahi</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Quantitative Electroencephalography for Objective and Differential Diagnosis of Depression: A Comprehensive Review</article-title>
          <source>GJHS</source>
          <year>2016</year>
          <month>03</month>
          <day>31</day>
          <volume>8</volume>
          <issue>11</issue>
          <fpage>249</fpage>
          <lpage>256</lpage>
          <pub-id pub-id-type="doi">10.5539/gjhs.v8n11p249</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <article-title>Digital 2022: global overview report</article-title>
          <source>DataReportal</source>
          <year>2022</year>
          <access-date>2022-07-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://datareportal.com/reports/digital-2022-global-overview-report">https://datareportal.com/reports/digital-2022-global-overview-report</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Park</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cha</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cha</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Depressive moods of users portrayed in Twitter</article-title>
          <source>KDD '12: Proceedings of the 18th ACM SIGKDD international conference on Knowledge discovery and data mining</source>
          <year>2012</year>
          <conf-name>KDD '12: The 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>August 12-16, 2012</conf-date>
          <conf-loc>Beijing, China</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>ACM</publisher-name>
          <fpage>12</fpage>
          <lpage>16</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choudhury</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Counts</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Horvitz</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Predicting postpartum changes in emotion and behavior via social media</article-title>
          <source>CHI '13: Proceedings of the SIGCHI Conference on Human Factors in Computing Systems</source>
          <year>2013</year>
          <month>4</month>
          <conf-name>CHI '13: CHI Conference on Human Factors in Computing Systems</conf-name>
          <conf-date>April 27, 2013 to May 2, 2013</conf-date>
          <conf-loc>Paris, France</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>ACM</publisher-name>
          <fpage>3267</fpage>
          <lpage>3276</lpage>
          <pub-id pub-id-type="doi">10.1145/2470654.2466447</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Phung</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Venkatesh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Berk</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Affective and Content Analysis of Online Depression Communities</article-title>
          <source>IEEE Trans. Affective Comput</source>
          <year>2014</year>
          <month>7</month>
          <day>1</day>
          <volume>5</volume>
          <issue>3</issue>
          <fpage>217</fpage>
          <lpage>226</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1109/TAFFC.2014.2315623"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/taffc.2014.2315623</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nie</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chua</surname>
              <given-names>T-S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Depression detection via harvesting social media: a multimodal dictionary learning solution</article-title>
          <source>Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence</source>
          <year>2017</year>
          <conf-name>Twenty-Sixth International Joint Conference on Artificial Intelligence</conf-name>
          <conf-date>August 19-25, 2017</conf-date>
          <conf-loc>Melbourne, VIC, Australia</conf-loc>
          <publisher-loc>Palo Alto, CA</publisher-loc>
          <publisher-name>AAAI Press</publisher-name>
          <fpage>3838</fpage>
          <lpage>3834</lpage>
          <pub-id pub-id-type="doi">10.24963/ijcai.2017/536</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Sadka</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Cost-sensitive Boosting Pruning Trees for depression detection on Twitter</article-title>
          <source>IEEE Trans. Affective Comput</source>
          <year>2022</year>
          <pub-id pub-id-type="doi">10.1109/taffc.2022.3145634</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Park</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cha</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Perception differences between the depressednon-depressed users in Twitter</article-title>
          <source>Proceedings of the Seventh International AAAI Conference on Weblogs and Social Media</source>
          <year>2013</year>
          <conf-name>Seventh International AAAI Conference on Weblogs and Social Media (ICWSM-13)</conf-name>
          <conf-date>July 8-11, 2013</conf-date>
          <conf-loc>Cambridge, MA</conf-loc>
          <fpage>476</fpage>
          <lpage>485</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ojs.aaai.org/index.php/ICWSM/article/view/14425/14274"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yates</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cohan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Goharian</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Depression and self-harm risk assessment in online forums</article-title>
          <source>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2017</year>
          <conf-name>2017 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>September 7-11, 2017</conf-date>
          <conf-loc>Copenhagen, Denmark</conf-loc>
          <publisher-loc>Copenhagen, Denmark</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>2968</fpage>
          <lpage>2978</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/d17-1322</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alhanai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Glass</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Detecting Depression with Audio/Text Sequence Modeling of Interviews</article-title>
          <year>2018</year>
          <conf-name>Proceedings of the INTERSPEECH 2018</conf-name>
          <conf-date>September 2-6, 2018</conf-date>
          <conf-loc>Hyderabad, Telangana, India</conf-loc>
          <fpage>1716</fpage>
          <lpage>1720</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://groups.csail.mit.edu/sls/publications/2018/Alhanai_Interspeech-2018.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.21437/Interspeech.2018-2522</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Depression Detection on Reddit With an Emotion-Based Attention Network: Algorithm Development and Validation</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>07</month>
          <day>16</day>
          <volume>9</volume>
          <issue>7</issue>
          <fpage>e28754</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/7/e28754/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/28754</pub-id>
          <pub-id pub-id-type="medline">34269683</pub-id>
          <pub-id pub-id-type="pii">v9i7e28754</pub-id>
          <pub-id pub-id-type="pmcid">PMC8325087</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Orabi</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Buddhitha</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Orabi</surname>
              <given-names>MH</given-names>
            </name>
          </person-group>
          <article-title>Deep learning for depression detection of twitter users</article-title>
          <source>Proceedings of the Fifth Workshop on Computational Linguistics and Clinical Psychology: From Keyboard to Clinic</source>
          <year>2018</year>
          <month>6</month>
          <conf-name>Fifth Workshop on Computational Linguistics and Clinical Psychology: From Keyboard to Clinic</conf-name>
          <conf-date>June 5, 2018</conf-date>
          <conf-loc>New Orleans, LA</conf-loc>
          <publisher-loc>Stroudsburg, PA</publisher-loc>
          <publisher-name>Association for Computational Linguistics (ACL)</publisher-name>
          <fpage>88</fpage>
          <lpage>97</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/W18-06.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/W18-06</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zogan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Razzak</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jameel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Explainable depression detection with multi-aspect features using a hybrid deep learning model on social media</article-title>
          <source>World Wide Web</source>
          <year>2022</year>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>281</fpage>
          <lpage>304</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35106059"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11280-021-00992-2</pub-id>
          <pub-id pub-id-type="medline">35106059</pub-id>
          <pub-id pub-id-type="pii">992</pub-id>
          <pub-id pub-id-type="pmcid">PMC8795347</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gui</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Depression Detection on Social Media with Reinforcement Learning</article-title>
          <source>Chinese Computational Linguistics: 18th China National Conference, CCL 2019, Kunming, China, October 18–20, 2019, Proceedings</source>
          <year>2019</year>
          <month>10</month>
          <conf-name>China National Conference on Chinese Computational Linguistics</conf-name>
          <conf-date>October 18, 2019</conf-date>
          <conf-loc>Kunming, China</conf-loc>
          <publisher-loc>Berlin/Heidelberg, Germany</publisher-loc>
          <publisher-name>Springer-Verlag</publisher-name>
          <fpage>613</fpage>
          <lpage>624</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-030-32381-3_49</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Convolutional Neural Networks for Sentence Classification</article-title>
          <source>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</source>
          <year>2014</year>
          <conf-name>Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <publisher-loc>Stroudsburg, PA</publisher-loc>
          <publisher-name>Association for Computational Linguistics (ACL)</publisher-name>
          <fpage>1746</fpage>
          <lpage>1751</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D14-1181"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1181</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of words and phrases and their compositionality</article-title>
          <source>NIPS'13: Proceedings of the 26th International Conference on Neural Information Processing Systems - Volume 2</source>
          <year>2013</year>
          <conf-name>26th International Conference on Neural Information Processing Systems (NIPS'13)</conf-name>
          <conf-date>December 5-10, 2013</conf-date>
          <conf-loc>Lake Tahoe, NV</conf-loc>
          <publisher-loc>Red Hook, NY</publisher-loc>
          <publisher-name>Curran Associates Inc</publisher-name>
          <fpage>3111</fpage>
          <lpage>3119</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Collobert</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bottou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Karlen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kavukcuoglu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kuksa</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing (almost) from scratch</article-title>
          <source>Journal of Machine Learning Research</source>
          <year>2011</year>
          <month>11</month>
          <day>1</day>
          <volume>12</volume>
          <issue>2011</issue>
          <fpage>2493</fpage>
          <lpage>2537</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume12/collobert11a/collobert11a.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.5555/1953048.2078186</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Graves</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jaitly</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mohamed</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Hybrid speech recognition with deep bidirectional LSTM</article-title>
          <source>Proceedings of the 2013 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)</source>
          <year>2013</year>
          <conf-name>2013 IEEE Workshop on Automatic Speech Recognition and Understanding</conf-name>
          <conf-date>December 8–13, 2013</conf-date>
          <conf-loc>Olomouc, Czech Republic</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>IEEE</publisher-name>
          <fpage>273</fpage>
          <lpage>278</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1109/ASRU.2013.6707742"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/asru.2013.6707742</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Compution</source>
          <year>1997</year>
          <month>11</month>
          <day>15</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>1780</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1162/neco.1997.9.8.1735"/>
          </comment>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
          <pub-id pub-id-type="medline">9377276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sutton</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>McAllester</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Mansour</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Policy gradient methods for reinforcement learning with function approximation</article-title>
          <source>NIPS'99: Proceedings of the 12th International Conference on Neural Information Processing Systems</source>
          <year>1999</year>
          <month>11</month>
          <conf-name>12th International Conference on Neural Information Processing Systems (NIPS'99)</conf-name>
          <conf-date>November 29 to December 4, 1999</conf-date>
          <conf-loc>Denver, CO</conf-loc>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
          <fpage>1057</fpage>
          <lpage>1063</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper/1999/file/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.5555/3009657.3009806</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dyer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Smola</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hovy</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Hierarchical attention networks for document classification</article-title>
          <source>Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2016</year>
          <month>6</month>
          <conf-name>2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL HLT 2016)</conf-name>
          <conf-date>June 12-17, 2016</conf-date>
          <conf-loc>San Diego, CA</conf-loc>
          <publisher-loc>Stroudsburg, PA</publisher-loc>
          <publisher-name>Association for Computational Linguistics (ACL)</publisher-name>
          <fpage>1480</fpage>
          <lpage>1489</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/N16-1174"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/n16-1174</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kingma</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Ba</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Adam: A Method for Stochastic Optimization</article-title>
          <source>arXiv.</source>
          <comment>Preprint posted online December 22, 2014<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1412.6980v9"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perrot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duchesnay</surname>
              <given-names>É</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: Machine learning in Python</article-title>
          <source>Journal of Machine Learning Research</source>
          <year>2011</year>
          <volume>12</volume>
          <issue>2011</issue>
          <fpage>2825</fpage>
          <lpage>2830</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rolet</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cuturi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Peyré</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Fast dictionary learning with a smoothed Wasserstein loss</article-title>
          <source>PMLR</source>
          <year>2016</year>
          <volume>51</volume>
          <fpage>630</fpage>
          <lpage>638</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://proceedings.mlr.press/v51/rolet16.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/inmic.2016.7840071</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Nie</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Multiple social network learning and its application in volunteerism tendency prediction</article-title>
          <source>Proceedings of the 38th International ACM SIGIR Conference on Research and Development in Information Retrieval</source>
          <year>2015</year>
          <conf-name>SIGIR '15: The 38th International ACM SIGIR Conference on Research and Development in Information Retrieval</conf-name>
          <conf-date>August 9-13, 2015</conf-date>
          <conf-loc>Santiago, Chile</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <fpage>9</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1145/2766462.2767726</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RoBERTa: A Robustly Optimized BERT Pretraining Approach</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online July 26, 2019<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1907.11692"/></comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
