<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i7e28227</article-id>
      <article-id pub-id-type="pmid">34255687</article-id>
      <article-id pub-id-type="doi">10.2196/28227</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Multifeature Fusion Attention Network for Suicide Risk Assessment Based on Social Media: Algorithm Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Hao</surname>
            <given-names>Tianyong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sun</surname>
            <given-names>Chengjie</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wang</surname>
            <given-names>Suge</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Jiacheng</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0224-9139</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Shaowu</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0796-2750</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Yijia</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>College of Computer Science and Technology</institution>
            <institution>Dalian University of Technology</institution>
            <addr-line>No 2 Linggong Road</addr-line>
            <addr-line>Ganjingzi District</addr-line>
            <addr-line>Dalian, 116023</addr-line>
            <country>China</country>
            <phone>86 13384118909</phone>
            <email>zhangyijia1979@gmail.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5843-4675</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>Hongfei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0872-7688</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Jian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4656-7446</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>College of Computer Science and Technology</institution>
        <institution>Dalian University of Technology</institution>
        <addr-line>Dalian</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Yijia Zhang <email>zhangyijia1979@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>9</day>
        <month>7</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>7</issue>
      <elocation-id>e28227</elocation-id>
      <history>
        <date date-type="received">
          <day>25</day>
          <month>2</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>19</day>
          <month>4</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>30</day>
          <month>4</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>5</day>
          <month>5</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Jiacheng Li, Shaowu Zhang, Yijia Zhang, Hongfei Lin, Jian Wang. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 09.07.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/7/e28227" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Suicide has become the fifth leading cause of death worldwide. With development of the internet, social media has become an imperative source for studying psychological illnesses such as depression and suicide. Many methods have been proposed for suicide risk assessment. However, most of the existing methods cannot grasp the key information of the text. To solve this problem, we propose an efficient method to extract the core information from social media posts for suicide risk assessment.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We developed a multifeature fusion recurrent attention model for suicide risk assessment.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used the bidirectional long short-term memory network to create the text representation with context information from social media posts. We further introduced a self-attention mechanism to extract the core information. We then fused linguistic features to improve our model.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We evaluated our model on the dataset delivered by the Computational Linguistics and Clinical Psychology 2019 shared task. The experimental results showed that our model improves the risk-F1, urgent-F1, and existence-F1 by 3.3%, 0.9%, and 3.7%, respectively.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We found that bidirectional long short-term memory performs well for long text representation, and the attention mechanism can identify the key information in the text. The external features can complete the semantic information lost by the neural network during feature extraction and further improve the performance of the model. The experimental results showed that our model performs better than the state-of-the-art method. Our work has theoretical and practical value for suicidal risk assessment.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>suicide risk assessment</kwd>
        <kwd>social media</kwd>
        <kwd>infodemiology</kwd>
        <kwd>attention mechanism</kwd>
        <kwd>neural networks</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The World Health Organization’s statistical report showed that millions of people choose to commit suicide every year, and even more people are preparing to implement suicide. In 2016, 21.2 in 100,000 people chose to commit suicide worldwide. Moreover, approximately 300,000 people commit suicide in China every year, and the number of suicide attempts is close to 200,000. Suicide has become the fifth leading cause of death worldwide [<xref ref-type="bibr" rid="ref1">1</xref>]. The traditional suicide risk assessment method is only dependent on the diagnosis of psychologists, which has great deficiencies with respect to inefficiency and coverage. With development of the internet, social media platforms such as Twitter, Sina Weibo, and WeChat Moments have developed rapidly in recent years. Social media has gradually become an integral part of our lives. People communicate with each other through social media, and use it as a platform to express their emotions and share their opinions, including suicidal social media posters who use these platforms to express their feelings. It is estimated that 68% of the people who use social media are 10 to 30 years old. Since the high-risk population for suicide is concentrated in the age group of 15 to 29 years, there is considerable overlap between these cohorts [<xref ref-type="bibr" rid="ref2">2</xref>]. This means that social media is an important data source for studying psychological illnesses such as depression and suicide.</p>
      <p>In recent years, text mining based on social media and its psychologically related submedia has become a hot topic in computational linguistics, which provides new research methods for social media–oriented suicide risk assessment. Many scholars have assessed suicide risk by extracting psychological features from texts. For example, Huang et al [<xref ref-type="bibr" rid="ref3">3</xref>] proposed a method to detect the suicide risk of social media users by identifying mental vocabulary. Zhang et al [<xref ref-type="bibr" rid="ref4">4</xref>] proposed a method of using linguistic features to assess suicide risk. However, this method has poor detection accuracy and generalization ability, leading to the development of machine learning–based approaches to tackle the task of suicide risk assessment. Kumar et al [<xref ref-type="bibr" rid="ref5">5</xref>] analyzed the posting activities of posters on the SuicideWatch subreddit that followed celebrity suicide news. They proposed a suicide risk assessment method based on the Werther effect and latent Dirichlet allocation [<xref ref-type="bibr" rid="ref6">6</xref>] model. De Choudhury et al [<xref ref-type="bibr" rid="ref7">7</xref>] analyzed the transition process of user tweets from mental health content to suicide content. They proposed a statistical method based on propensity score matching to detect the user’s suicidal intent. Bittar et al [<xref ref-type="bibr" rid="ref8">8</xref>] proposed a method to detect suicide risk using machine learning for electronic health records. Ji et al [<xref ref-type="bibr" rid="ref9">9</xref>] proposed a new data protection scheme and average difference reduction optimization strategy (AvgDiffLDP) to improve the machine learning model. In addition to machine learning–based methods, deep learning–based methods also have shown good performance in text classification. Shing et al [<xref ref-type="bibr" rid="ref10">10</xref>] proposed a convolutional neural network (CNN) fused with external dictionary features to detect suicide risk. Mohammadi et al [<xref ref-type="bibr" rid="ref11">11</xref>] proposed a multichannel classification model including a CNN and recurrent neural network (RNN).</p>
      <p>It is necessary to judge the text from different angles when assessing the suicide risk of posts. However, it is difficult for a single model to fully capture the semantic information of the text. Therefore, inspired by previous work [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>], we here propose a multifeature fusion recurrent attention model for the social media–oriented suicidal risk assessment task. The attention model is used to capture the semantic information in the text and merge it with other external features to better assess the effect.</p>
      <p>The main contributions of this paper are divided into the following aspects. First, we propose a recurrent attention model. Using this model to represent the text can extract the core semantic information of the text. We further introduce a distribution loss function to reduce the impact of uneven data distribution.</p>
      <p>Second, we fuse external features based on neural networks. These external features are valuable in suicide risk assessment and can further improve the performance of our model.</p>
      <p>Finally, experimental results showed that our model achieved state-of-the-art performance on the suicide risk assessment dataset, demonstrating that the model has excellent performance and good practical value.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Multifeature Fusion Recurrent Attention Network</title>
        <p>The multifeature fusion recurrent attention method proposed in this paper consists of four parts. The framework of our model is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. The first part of the model uses a long short-term memory network (LSTM) to obtain the text representation <italic>T</italic>, which has an attention weight <italic>α</italic> in the second part of the model-attention mechanism. The third part of the model is the feature extraction layer, which is used to capture features in the post that are difficult to be extracted by the neural network. The model then fuses the external feature vector with the attention vector to assess suicide risk.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Architecture of the multifeature fusion recurrent attention network. LSTM: long short-term memory; MLP: multilayer perceptron.</p>
          </caption>
          <graphic xlink:href="medinform_v9i7e28227_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>LSTM Network</title>
        <p>The LSTM network was proposed by Hochreiter et al [<xref ref-type="bibr" rid="ref12">12</xref>], which is a variant of the RNN. LSTM introduces a “gate layer” to control neurons to update information, increasing the ability to avoid long-distance dependency problems. LSTM further solves the gradient explosion and gradient disappearance of an RNN when training long text. Therefore, LSTM is the best choice for solving long text classification tasks. The algorithm process of LSTM is as follows:</p>
        <disp-formula><italic>f<sub>k</sub></italic>=<italic>σ</italic>(<italic>W<sup>f</sup>x<sub>k</sub></italic> + <italic>V<sup>f</sup>h<sub>k–1</sub></italic> + <italic>b<sup>if</sup></italic>) <bold>(1)</bold></disp-formula>
        <disp-formula><italic>i<sub>k</sub></italic>=<italic>σ</italic>(<italic>i<sup>f</sup>x<sub>k</sub></italic> + <italic>V<sup>i</sup>h<sub>k–1</sub></italic> + <italic>b<sup>f</sup></italic>) <bold>(2)</bold></disp-formula>
        <disp-formula><italic>o<sub>k</sub></italic>=<italic>σ</italic>(<italic>W<sup>o</sup>x<sub>k</sub></italic> + <italic>V<sup>o</sup>h<sub>k–1</sub></italic> + <italic>b<sup>o</sup></italic>) <bold>(3)</bold></disp-formula>
        <disp-formula><italic>c′<sub>k</sub></italic>=<italic>tanh</italic>(<italic>W<sup>c</sup>x<sub>k</sub></italic> + <italic>V<sup>c</sup>h<sub>k–1</sub></italic> + <italic>b<sup>c</sup></italic>) <bold>(4)</bold></disp-formula>
        <disp-formula><italic>c′<sub>k</sub></italic>=<italic>f<sub>k</sub></italic> ⊙ <italic>c<sub>k–1</sub></italic> + <italic>i<sub>k</sub></italic> ⊙ <italic>c′<sub>k</sub></italic> <bold>(5)</bold></disp-formula>
        <disp-formula><italic>h<sub>k</sub>=o<sub>k</sub></italic> ⊙ <italic>tanh</italic> (<italic>c<sub>k</sub></italic>) <bold>(6)</bold></disp-formula>
        <p>where <italic>σ</italic> represents the sigmoid function and ⊙ represents the element-wise multiplication of two vectors. If an input sequence is <italic>X</italic>=[<italic>x<sub>1</sub>, x<sub>2</sub>, x<sub>3</sub>,…,x<sub>N</sub></italic>] for the input <italic>x<sub>k</sub></italic>(1≤<italic>k</italic>≤<italic>N</italic>) of each position, LSTM needs three steps to output the hidden state <italic>h<sub>k</sub></italic>. In the first step, the forget gate <italic>sigmoid</italic> function decides whether the memory cell <italic>c<sub>k</sub></italic> needs to forget information based on the hidden state <italic>h<sub>k–1</sub></italic> of the previous position and input <italic>x<sub>k</sub></italic>. The next step is to decide what information the memory cell needs to update, and this step can be divided into two parts. First, the input gate <italic>sigmoid</italic> function determines whether the memory cell needs to update information. Then, the <italic>tanh</italic> function will generate a new candidate value <italic>c′<sub>k</sub></italic>. The new state of the memory cell will be updated under the joint action of the forgetting gate and input gate. In the last step, the hidden state of this position is limited between 0 and 1 under the action of the <italic>tanh</italic> function, and the output gate sigmoid function decides whether the neuron needs to output.</p>
        <p>LSTM can obtain the information of the current position through the above steps, but the text below is also essential. In the bidirectional LSTM (BiLSTM), the forward LSTM can extract the above information and the backward LSTM can extract the following information. The BiLSTM combines the above hidden state and the below hidden state in the same position to create a new hidden state, which can obtain more context information. The hidden state <italic>h<sub>k</sub></italic> of the BiLSTM is shown in Equation 9.</p>
        <disp-formula>
          <graphic xlink:href="medinform_v9i7e28227_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
      </sec>
      <sec>
        <title>Self-Attention Layer</title>
        <p>In a sentence, there are only a few words that can represent the semantic information of the entire sentence. If the model treats every word the same way, the learning ability of the model will be wasted, which will reduce the efficiency of the model. Therefore, we introduce the attention mechanism to this process. This adds an attention weight to each word in the text so that the model will pay more attention to words with higher weights. The attention mechanism has achieved excellent performance in natural language processing tasks owing to its advantages of fewer parameters, faster model training, and stronger interpretability [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>For the hidden state <inline-graphic xlink:href="medinform_v9i7e28227_fig4.png" xlink:type="simple" mimetype="image"/> from the BiLSTM, the calculation process to obtain the attention weight <inline-graphic xlink:href="medinform_v9i7e28227_fig5.png" xlink:type="simple" mimetype="image"/> is as follows:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v9i7e28227_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <inline-graphic xlink:href="medinform_v9i7e28227_fig7.png" xlink:type="simple" mimetype="image"/> are trainable parameters and <inline-graphic xlink:href="medinform_v9i7e28227_fig8.png" xlink:type="simple" mimetype="image"/> is the attention score of the input hidden state <inline-graphic xlink:href="medinform_v9i7e28227_fig4.png" xlink:type="simple" mimetype="image"/>. Normalization of <inline-graphic xlink:href="medinform_v9i7e28227_fig8.png" xlink:type="simple" mimetype="image"/> is the softmax function that can provide the attention weight of the input. The vector representation of the entire sentence can then be calculated by Equation 12:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v9i7e28227_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
      </sec>
      <sec>
        <title>Feature Extraction Layer</title>
        <p>The neural network focuses on the semantic information of the text, but there are other linguistic features in the text that can help to assess suicide risk. We set up three sets of linguistic features: n-gram features, lexicon-based features, and symbolic features.</p>
        <p>For n-gram features, we used bigram and trigram linguistic models as features, and we used term frequency-inverse document frequency (TF-IDF) weights to calculate the feature values. However, the feature matrix is very sparse, and therefore we used nonnegative matrix factorization [<xref ref-type="bibr" rid="ref13">13</xref>] to reduce the dimension to 50.</p>
        <p>For lexicon-based features, since a sentiment word represents the sentiment tendency of the entire text, we introduced the NRC [<xref ref-type="bibr" rid="ref14">14</xref>] dictionary to capture the posters’ emotions. We separately counted the number of emotional words representing positive emotions, negative emotions, sadness, anger, despair, and fear in a post, and the length of the post. We combined these statistics as a lexicon-based feature vector.</p>
        <p>For symbolic features, Stirman et al [<xref ref-type="bibr" rid="ref15">15</xref>] proposed that suicidal people are self-oriented and they frequently use first-person pronouns. Yang et al [<xref ref-type="bibr" rid="ref16">16</xref>] proposed that suicidal people frequently use rhetorical rhetoric to emphasize their emotions consciously. In social media posts, emojis are also used to express emotions. Therefore, we counted the number of first-person pronouns (eg, “I,” “me,” “mine,” “myself”), question marks, and emojis in posts as symbolic features.</p>
      </sec>
      <sec>
        <title>Classification Layer</title>
        <p>The classification layer used in this study consisted of two parts: a multilayer perceptron and softmax layer. The multilayer perceptron produces classification results and the classification probability is normalized by the softmax layer. We also used the distribution loss function to train the model. Owing to the small number of samples in the dataset, we introduced <italic>L<sub>2</sub></italic> regularization to reduce the overfitting problem of the model.</p>
        <disp-formula>
          <graphic xlink:href="medinform_v9i7e28227_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <italic>N</italic> is the total number of training data, <italic>M</italic> is the number of categories, and <italic>q<sub>i</sub></italic> and <inline-graphic xlink:href="medinform_v9i7e28227_fig11.png" xlink:type="simple" mimetype="image"/> represent the classification result and classification probability, respectively. In Equation 14, <italic>y<sup>i</sup><sub>j</sub></italic> is the ground truth, <italic>λ</italic> is the coefficient of the <italic>L<sub>2</sub></italic> regularization term, and <italic>θ</italic> is a hyperparameter. In particular, we introduced the distribution weight γ in the loss function, which is a trainable parameter [<xref ref-type="bibr" rid="ref17">17</xref>]. Categories with more training data have smaller weights. The distribution loss function can reduce the impact of an uneven data distribution.</p>
      </sec>
      <sec>
        <title>Experimental Settings</title>
        <p>Before the experiment, we set the initial parameters based on previous modeling experience. We tuned the model parameters on the development set and achieved the best results. We used Adam to optimize the model. The parameters of the optimal model are shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Hyperparameter settings.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Hyperparameters</td>
                <td>Optimal value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Word embedding dimension</td>
                <td>300</td>
              </tr>
              <tr valign="top">
                <td>BiLSTM<sup>a</sup> hidden units</td>
                <td>200</td>
              </tr>
              <tr valign="top">
                <td>Learning rate</td>
                <td>0.2</td>
              </tr>
              <tr valign="top">
                <td>Dropout rate</td>
                <td>0.5</td>
              </tr>
              <tr valign="top">
                <td>L<sub>2</sub> regularization weight</td>
                <td>10<sup>–5</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>BiLSTM: bidirectional long short-term memory.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Dataset</title>
        <p>The suicide risk assessment dataset was released by the Computational Linguistics and Clinical Psychology (CLPsych) 2019 shared task. The goal of CLPsych 2019 was to assess users’ suicide risk based on their posts. The dataset constructed by Shin et al [<xref ref-type="bibr" rid="ref10">10</xref>] in 2018 consists of posts published on the Reddit social media platform between 2005 and 2015. To protect users’ privacy, their personal information was replaced by a user ID.</p>
        <p>This paper is based on CLPsych-2019 task A (“From Keyboard to Clinic”). Texts used in our dataset were all derived from posts with varying degrees of suicide risk on the SuicideWatch subreddit. The CLPsych dataset was broken down to include 57,016 posts in the training set and 9611 posts in the test set, all from the SuicideWatch subreddit. Among them, the proportion of samples in each category was close to 1:1:1:1. The shortest sentence contained 14 words and the longest sentence contained 486 words. We defined the three following assessment methods to better assess the suicide risk and increase the practicality of the model: (1) suicide risk (risk), which has the same requirements of the CLPsych share task, divided into four classes <italic>a, b, c, d</italic> from low to high; (2) suicide existence (existence), which is an indicator used to judge whether the poster has a suicidal intention so that the posts can be divided into two levels of exist versus not exist, with the latter indicating a shallow suicide risk (<italic>class a</italic>), and they are not likely to commit suicide in the near future; (3) suicide urgency (urgency), in which the post is divided into two levels of urgent versus not urgent according to the suicide risk, with the urgent level (classes <italic>a, b</italic>) indicating that the user needs psychological assistance urgently.</p>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows the postassessment results obtained under the different suicide risk assessment methods.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Example posts from the SuicideWatch subreddit.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="650"/>
            <col width="70"/>
            <col width="140"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Post</td>
                <td>Risk</td>
                <td>Existence</td>
                <td>Urgency</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>A nihilist teetering on edge. Things were good before I came into being</td>
                <td>a</td>
                <td>Not exist</td>
                <td>Not urgent</td>
              </tr>
              <tr valign="top">
                <td>Has anyone attempted suicide and failed and then felt guilty for being incompetent?</td>
                <td>b</td>
                <td>Exist</td>
                <td>Not urgent</td>
              </tr>
              <tr valign="top">
                <td>Just sitting on a bench, waiting and thinking. I don’t want to, but it feels like the best option.</td>
                <td>c</td>
                <td>Exist</td>
                <td>Urgent</td>
              </tr>
              <tr valign="top">
                <td>Tell me how to commit suicide painlessly.</td>
                <td>d</td>
                <td>Exist</td>
                <td>Urgent</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Evaluation Metrics</title>
        <p>In the experiments, the performance of our model was evaluated by the macroaverage <italic>F<sub>1</sub></italic> score. The verification method was as follows:</p>
        <disp-formula><italic>P</italic>=<italic>TP</italic>/<italic>TP</italic>+<italic>FP</italic><bold>(15)</bold></disp-formula>
        <disp-formula><italic>R</italic>=<italic>TP</italic>/<italic>TP</italic>+<italic>FN</italic><bold>(16)</bold></disp-formula>
        <disp-formula><italic>F<sub>1</sub></italic>=2×<italic>P</italic>×<italic>R</italic>/<italic>P</italic>+<italic>R</italic> <bold>(17)</bold></disp-formula>
        <p>where <italic>P</italic> and <italic>R</italic> are precision and recall, respectively. TP, FN, and FP represent the true positive, false negative, and false positive predictions, respectively. The <italic>F<sub>1</sub></italic> score is a harmonic average of precision and recall.</p>
      </sec>
      <sec>
        <title>Comparison With Baseline</title>
        <p>To compare the performance of different models in the suicide assessment task, we tested different classification models on the training set. The experimental results are shown in <xref ref-type="table" rid="table3">Table 3</xref>.</p>
        <p>The inputs of the above models are all 300-dimensional Glove word embedding vectors. As shown in <xref ref-type="table" rid="table3">Table 3</xref>, the performance of the deep learning–based models was better than that of the machine learning–based models. The results of the LSTM and BiLSTM were also better than those of the CNN. In particular, LSTM was better than CNN for long text processing, and the performance of BiLSTM was better than that of LSTM. This shows that BiLSTM can capture more contextual semantic information. The results of the ensemble models were significantly better than those of the single models. In addition, different models showed different capabilities of semantic information extraction, and the combination of different models can supplement the missing semantic information of a single model. The result of the BiLSTM+Attention model was better than that of the BiLSTM+CNN model. This assessment demonstrated that our introduced attention mechanism is more suitable for this task.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Experimental results of classification models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="170"/>
            <col width="170"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Models</td>
                <td>Risk-F<sub>1</sub></td>
                <td>Existence- F<sub>1</sub></td>
                <td>Urgency -F<sub>1</sub></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>SVM<sup>a</sup></td>
                <td>0.296</td>
                <td>0.793</td>
                <td>0.716</td>
              </tr>
              <tr valign="top">
                <td>CNN<sup>b</sup></td>
                <td>0.336</td>
                <td>0.834</td>
                <td>0.742</td>
              </tr>
              <tr valign="top">
                <td>LSTM<sup>c</sup></td>
                <td>0.397</td>
                <td>0.862</td>
                <td>0.766</td>
              </tr>
              <tr valign="top">
                <td>BiLSTM<sup>d</sup></td>
                <td>0.404</td>
                <td>0.863</td>
                <td>0.774</td>
              </tr>
              <tr valign="top">
                <td>BiLSTM+CNN</td>
                <td>0.423</td>
                <td>0.872</td>
                <td>0.789</td>
              </tr>
              <tr valign="top">
                <td>BiLSTM+Attention (proposed model)</td>
                <td>0.448</td>
                <td>0.887</td>
                <td>0.796</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>CNN: convolutional neural network.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>LSTM: long short-term memory.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>BiLSTM: bidirectional long short-term memory.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Comparison of Different Input Features</title>
        <p>In addition to using the deep learning–based model, we also set up three sets of linguistic features: n-gram features, lexicon-based features, and symbolic features. To test the influence of different features on the suicide risk assessment task, we set up 6 sets of comparative experiments. We separately recorded the experimental results of a support vector machine (SVM) model. The experimental results are shown in <xref ref-type="table" rid="table4">Table 4</xref>.</p>
        <p>The <italic>risk-F<sub>1</sub></italic> score using TF-IDF features was 0.257. The performance of the n-gram–based method was better than that of TF-IDF. The results of the trigram were better than those of the bigram. Using lexicon features had the most significant improvement on the results, whereas the symbolic features improved the performance to a lesser extent. Concatenating all feature vectors showed that using ensemble features was the best choice for our task, with a <italic>risk-F<sub>1</sub></italic> score of 0.284.</p>
        <p>We further compared the effects of embedding methods on the experimental results. The pretraining language model bidirectional encoder representations from transformers (BERT) can also be used for classification tasks alone. We compared the pretraining language model BERT with the BiLSTM and BiLSTM+Attention models, which showed excellent performance on our task. We used word2vec word embedding [<xref ref-type="bibr" rid="ref18">18</xref>], Glove word embedding [<xref ref-type="bibr" rid="ref19">19</xref>], and BERT embedding as the input of the model. The experimental results are shown in <xref ref-type="table" rid="table5">Table 5</xref>.</p>
        <p>The result improved slightly after adding LSTM. Using the pretrained language model BERT resulted in better performance than using the word embedding model. We also concatenated ensemble features at the classification layer, which further improved the performance of the model.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Experimental results of different features for support vector machine models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="170"/>
            <col width="170"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Input</td>
                <td>Risk-F<sub>1</sub></td>
                <td>Existence- F<sub>1</sub></td>
                <td>Urgency -F<sub>1</sub></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>TF-IDF<sup>a</sup></td>
                <td>0.257</td>
                <td>0.783</td>
                <td>0.691</td>
              </tr>
              <tr valign="top">
                <td>Bigram+TF-IDF</td>
                <td>0.271</td>
                <td>0.802</td>
                <td>0.712</td>
              </tr>
              <tr valign="top">
                <td>Trigram+TF-IDF</td>
                <td>0.276</td>
                <td>0.798</td>
                <td>0.709</td>
              </tr>
              <tr valign="top">
                <td>Lexicon+TF-IDF</td>
                <td>0.282</td>
                <td>0.826</td>
                <td>0.721</td>
              </tr>
              <tr valign="top">
                <td>Symbolic+TF-IDF</td>
                <td>0.254</td>
                <td>0.784</td>
                <td>0.684</td>
              </tr>
              <tr valign="top">
                <td>n-gram+lexicon+symbolic+TF-IDF</td>
                <td>0.284</td>
                <td>0.835</td>
                <td>0.724</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>TF-IDF: term frequency-inverse document frequency.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Experimental results of deep learning–based models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="0"/>
            <col width="170"/>
            <col width="0"/>
            <col width="170"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Models and input</td>
                <td colspan="2">Risk-F<sub>1</sub></td>
                <td>Existence- F<sub>1</sub></td>
                <td>Urgency -F<sub>1</sub></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">BERT<sup>a</sup></td>
                <td colspan="2">0.467</td>
                <td>0.889</td>
                <td>0.861</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>BiLSTM<sup>b</sup></bold>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec</td>
                <td colspan="2">0.404</td>
                <td colspan="2">0.863</td>
                <td>0.774</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Glove</td>
                <td colspan="2">0.412</td>
                <td colspan="2">0.861</td>
                <td>0.793</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT</td>
                <td colspan="2">0.474</td>
                <td colspan="2">0.914</td>
                <td>0.857</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT+Features</td>
                <td colspan="2">0.481</td>
                <td colspan="2">0.923</td>
                <td>0.863</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>BiLSTM+Attention</bold>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2ve</td>
                <td colspan="2">0.448</td>
                <td colspan="2">0.887</td>
                <td>0.796</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Glove</td>
                <td colspan="2">0.456</td>
                <td colspan="2">0.891</td>
                <td>0.787</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT</td>
                <td colspan="2">0.507</td>
                <td colspan="2">0.915</td>
                <td>0.863</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT+Features</td>
                <td colspan="2">0.514</td>
                <td colspan="2">0.931</td>
                <td>0.876</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>BERT: bidirectional encoder representations from transformers.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>BiLSTM: bidirectional long short-term memory.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Comparison With Other Existing Models</title>
        <p>We compared our model with the methods of other teams in the CLPsych 2019 shared task, demonstrating that our model achieved the best results. The risk-<italic>F<sub>1</sub></italic>, urgent-<italic>F<sub>1</sub></italic>, and existing-<italic>F<sub>1</sub></italic> all reached the highest levels with our proposed model (<xref ref-type="table" rid="table6">Table 6</xref>).</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Experimental results of existing methods.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="170"/>
            <col width="170"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Models</td>
                <td>Risk-<italic>F</italic><sub>1</sub></td>
                <td>Existence-<italic>F</italic><sub>1</sub></td>
                <td>Urgency-<italic>F</italic><sub>1</sub></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Mohammadi et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td>
                <td>0.481</td>
                <td>0.922</td>
                <td>0.776</td>
              </tr>
              <tr valign="top">
                <td>Matero et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td>
                <td>0.459</td>
                <td>0.842</td>
                <td>0.839</td>
              </tr>
              <tr valign="top">
                <td>Bitew et al [<xref ref-type="bibr" rid="ref21">21</xref>]</td>
                <td>0.445</td>
                <td>0.852</td>
                <td>0.789</td>
              </tr>
              <tr valign="top">
                <td>Iserman et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td>
                <td>0.402</td>
                <td>0.902</td>
                <td>0.844</td>
              </tr>
              <tr valign="top">
                <td>Allen et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td>
                <td>0.373</td>
                <td>0.876</td>
                <td>0.773</td>
              </tr>
              <tr valign="top">
                <td>González Hevia et al [<xref ref-type="bibr" rid="ref24">24</xref>]</td>
                <td>0.312</td>
                <td>0.897</td>
                <td>0.821</td>
              </tr>
              <tr valign="top">
                <td>Multifeature fusion recurrent attention (this study)</td>
                <td>0.514 (+0.033)</td>
                <td>0.931 (+0.009)</td>
                <td>0.876 (+0.037)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>Mohammadi et al [<xref ref-type="bibr" rid="ref11">11</xref>] proposed an ensemble method including 8 neural submodels to extract neural features. They then used the SVM classifier to classify the neural feature vector. They achieved a risk-<italic>F<sub>1</sub></italic> score of 0.481 and an existence-<italic>F<sub>1</sub></italic> score of 0.922 (the highest result in CLPsych 2019). González Hevia et al [<xref ref-type="bibr" rid="ref24">24</xref>] also proposed an ensemble method combined with the result of the SVM classifier and a pretrained RNN. Marero et al [<xref ref-type="bibr" rid="ref20">20</xref>] proposed multilevel dual-context language and BERT using the deep attention model to extract dual-context information. Their model was also fused with linguistic features and achieved the highest urgency-<italic>F<sub>1</sub></italic> score of 0.839. Bitew et al [<xref ref-type="bibr" rid="ref21">21</xref>] proposed a machine learning–based method, and integrated the logistic regression classifier and the linear SVM classifier. Iserman et al [<xref ref-type="bibr" rid="ref22">22</xref>] proposed a simple recursive partitioning model with lexicon features. Similarly, Allen et al [<xref ref-type="bibr" rid="ref23">23</xref>] used CNN and Linguistic Inquiry and Word Count [<xref ref-type="bibr" rid="ref25">25</xref>] features to assess suicide risk.</p>
      </sec>
      <sec>
        <title>Attention Visualization and Error Analysis</title>
        <p>To analyze the effectiveness of the attention mechanism, we extracted the attention weight of the self-attention layer and visualized it with text. The attention visualization results are shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>; a deeper color indicates a larger attention weight for the word.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Examples of attention visualization.</p>
          </caption>
          <graphic xlink:href="medinform_v9i7e28227_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Among the four posts shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>, the first two posts are classified into the right class by the model, whereas the last two posts are classified into the wrong category. As shown in the first post, “kill” has the largest weight, which is the core word of this post, and the model also pays attention to “knew” and “do it now.” The model then classified this post into <italic>class d</italic> (high suicide risk). In the second post, the model focused on “tired of trying” and “can’t keep going.” This shows that the model pays attention to words that represent the emotion of the poster. This post lacks the terms associated with high suicide risk, and therefore the model classified this post into <italic>class c</italic>.</p>
        <p>In the third post (<italic>class b</italic>)<italic>,</italic> the model focused on the terms “how” and “their last words.” However, the model did not learn that the subject of “last words” was “they” instead of the poster, and therefore mistakenly classified the post into <italic>class d</italic>. In the fourth post (<italic>class a</italic>), the model focused on “having,” “feeling,” and “for a year,” and mistakenly believed that this post reflects a high suicide risk. This is because we found that “feeling” is often associated with words that express negative emotions in the training set. Therefore, we believe that the accuracy can be improved by fusing external features.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The results of n-gram features based on TF-IDF weights were better than those obtained using TF-IDF features, which cannot capture the word order information in the text. However, the results of trigram features were inferior to those of bigram features. This shows that although n-gram features can capture the word order information, if multiple features are extracted, the feature vectors will be sparse and reduce the performance of the model. In the experiment, using dictionary features improved the model’s performance significantly. This demonstrates that the emotional tendency of a text can be represented by the limited number of emotional words in the text. The use of symbolic features showed only minor improvements on performance, indicating that punctuation in the text can also express part of the semantic information.</p>
        <p>Our model uses the BERT pretraining model as input. The pretrain word vectors represent the semantic information of words, making up the missing information of word embedding models.</p>
        <p>The experimental results further showed that BiLSTM performs well in extended text classification. BiLSTM can capture the semantic information of the context in the text and solve long-distance dependence in text processing. After adding the attention mechanism, the performance of the model was further improved. This shows that the attention mechanism can effectively make the model pay attention to the core semantic features of a text.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This paper proposes a multifeature fusion recurrent attention network to assess the suicide risk of SuicideWatch subreddit posts. Our model uses the BERT pretrained language model as input, which can create a more precise text representation than the word embedding model. The BiLSTM in the model can capture long-distance dependence and dual-content information. The self-attention mechanism can make the model focus on the core information of the post. The model achieved the best performance on the experimental dataset. Moreover, we introduced n-gram features, lexicon features, and symbolic features, which make up the missing information in the feature extraction of the recurrent attention network, thereby improving the accuracy of the model.</p>
        <p>In our future work, we will introduce the personality characteristics of the posters and other social media attributes of the posters for further improving suicide risk assessment.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BiLSTM</term>
          <def>
            <p>bidirectional long short-term memory network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CLPsych</term>
          <def>
            <p>Computational Linguistics and Clinical Psychology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LSTM</term>
          <def>
            <p>long short-term memory network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">RNN</term>
          <def>
            <p>recurrent neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">TF-IDF</term>
          <def>
            <p>term frequency-inverse document frequencies</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The work is supported by grants from the National Natural Science Foundation of China (62072070).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>JL designed the algorithm and experiments and wrote the paper. YZ provided theoretical guidance and the revision of this paper. SZ, YZ, HL, and JW contributed to the algorithm design. All authors read and approved the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>National Suicide Prevention Strategies: Progress, Examples and Indicators</article-title>
          <source>World Health Organization</source>
          <year>2018</year>
          <access-date>2021-06-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://apps.who.int/iris/handle/10665/279765">https://apps.who.int/iris/handle/10665/279765</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lv</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Creating a Chinese suicide dictionary for identifying suicide risk on social media</article-title>
          <source>PeerJ</source>
          <year>2015</year>
          <volume>3</volume>
          <fpage>e1455</fpage>
          <pub-id pub-id-type="doi">10.7717/peerj.1455</pub-id>
          <pub-id pub-id-type="medline">26713232</pub-id>
          <pub-id pub-id-type="pii">1455</pub-id>
          <pub-id pub-id-type="pmcid">PMC4690390</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Detecting suicidal ideation in Chinese microblogs with psychological lexicons</article-title>
          <year>2014</year>
          <conf-name>2014 IEEE 11th International Conference on Ubiquitous Intelligence and Computing and 2014 IEEE International Conference on Autonomic and Trusted Computing and 2014 IEEE International Conference on Scalable Computing and Communications and Its Associated Workshops (UIC-ATC-ScalCom)</conf-name>
          <conf-date>December 9-12, 2014</conf-date>
          <conf-loc>Bali, Indonesia</conf-loc>
          <fpage>844</fpage>
          <lpage>849</lpage>
          <pub-id pub-id-type="doi">10.1109/uic-atc-scalcom.2014.48</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Using linguistic features to estimate suicide probability of Chinese microblog users</article-title>
          <year>2014</year>
          <month>11</month>
          <day>27</day>
          <conf-name>International Conference on Human Centered Computing</conf-name>
          <conf-date>2014</conf-date>
          <conf-loc>Phnom Penh, Cambodia</conf-loc>
          <fpage>549</fpage>
          <lpage>559</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-319-15554-8_45</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Coppersmith</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Detecting changes in suicide content manifested in social media following celebrity suicides</article-title>
          <source>HT ACM Conf Hypertext Soc Media</source>
          <year>2015</year>
          <month>09</month>
          <volume>2015</volume>
          <fpage>85</fpage>
          <lpage>94</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28713876"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2700171.2791026</pub-id>
          <pub-id pub-id-type="medline">28713876</pub-id>
          <pub-id pub-id-type="pmcid">PMC5507358</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blei</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>MI</given-names>
            </name>
          </person-group>
          <article-title>Latent dirichllocation</article-title>
          <source>J Machine Learn Res</source>
          <year>2003</year>
          <month>3</month>
          <day>4</day>
          <volume>3</volume>
          <fpage>993</fpage>
          <lpage>1022</lpage>
          <pub-id pub-id-type="doi">10.1162/jmlr.2003.3.4-5.993</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kiciman</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Coppersmith</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Discovering shifts to suicidal ideation from mental health content in social media</article-title>
          <source>Proc SIGCHI Conf Hum Factor Comput Syst</source>
          <year>2016</year>
          <month>05</month>
          <volume>2016</volume>
          <fpage>2098</fpage>
          <lpage>2110</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29082385"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2858036.2858207</pub-id>
          <pub-id pub-id-type="medline">29082385</pub-id>
          <pub-id pub-id-type="pmcid">PMC5659860</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bittar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Velupillai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dutta</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Text classification to inform suicide risk assessment in electronic health records</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2019</year>
          <month>08</month>
          <day>21</day>
          <volume>264</volume>
          <fpage>40</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.3233/SHTI190179</pub-id>
          <pub-id pub-id-type="medline">31437881</pub-id>
          <pub-id pub-id-type="pii">SHTI190179</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Long</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Detecting suicidal ideation with data protection in online communities</article-title>
          <year>2019</year>
          <conf-name>International Conference on Database Systems for Advanced Applications</conf-name>
          <conf-date>2019</conf-date>
          <conf-loc>Thailand</conf-loc>
          <fpage>225</fpage>
          <lpage>229</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-030-18590-9_17</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shing</surname>
              <given-names>HC</given-names>
            </name>
            <name name-style="western">
              <surname>Nair</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zirikly</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Friedenberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Daumé III</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Resnik</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Expert, crowdsourced, and machine assessment of suicide risk via online postings</article-title>
          <year>2018</year>
          <conf-name>Fifth Workshop on Computational Linguistics and Clinical Psychology: From Keyboard to Clinic</conf-name>
          <conf-date>June 2018</conf-date>
          <conf-loc>New Orleans</conf-loc>
          <fpage>25</fpage>
          <lpage>36</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/w18-0603</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mohammadi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Amini</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kosseim</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>ClaC at CLPsych 2019: Fusion of Neural Features and Predicted Class Probabilities for Suicide Risk Assessment Based on Online Posts</article-title>
          <year>2019</year>
          <conf-name>Proceedings of the Sixth Workshop on Computational Linguistics and Clinical Psychology</conf-name>
          <conf-date>2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>34</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/W19-3004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Comput</source>
          <year>1997</year>
          <month>11</month>
          <day>15</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>1780</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
          <pub-id pub-id-type="medline">9377276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Févotte</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Idier</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Algorithms for nonnegative matrix factorization with the β-divergence</article-title>
          <source>Neur Comput</source>
          <year>2011</year>
          <month>09</month>
          <volume>23</volume>
          <issue>9</issue>
          <fpage>2421</fpage>
          <lpage>2456</lpage>
          <pub-id pub-id-type="doi">10.1162/neco_a_00168</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mohammad</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>Word Affect Intensities</article-title>
          <year>2018</year>
          <conf-name>Eleventh International Conference on Language Resources and Evaluation</conf-name>
          <conf-date>May 2018</conf-date>
          <conf-loc>Miyazaki, Japan</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stirman</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Pennebaker</surname>
              <given-names>JW</given-names>
            </name>
          </person-group>
          <article-title>Word use in the poetry of suicidal and nonsuicidal poets</article-title>
          <source>Psychosom Med</source>
          <year>2001</year>
          <volume>63</volume>
          <issue>4</issue>
          <fpage>517</fpage>
          <lpage>522</lpage>
          <pub-id pub-id-type="doi">10.1097/00006842-200107000-00001</pub-id>
          <pub-id pub-id-type="medline">11485104</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>TI-CNN: Convolutional neural networks for fake news detection</article-title>
          <source>arxiv</source>
          <access-date>2018-06-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1806.00749">https://arxiv.org/abs/1806.00749</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Girshick</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Focal loss for dense object detection</article-title>
          <source>IEEE Trans Patt Anal Machine Intell</source>
          <year>2017</year>
          <volume>42</volume>
          <issue>2</issue>
          <fpage>318</fpage>
          <lpage>327</lpage>
          <pub-id pub-id-type="doi">10.1109/iccv.2017.324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Efficient estimation of word representations in vector space</article-title>
          <year>2013</year>
          <conf-name>1st International Conference on Learning Representations, ICLR 2013</conf-name>
          <conf-date>May 2-4, 2013</conf-date>
          <conf-loc>Scottsdale, AZ</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennington</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>GloVe: Global Vectors for Word Representation</article-title>
          <year>2014</year>
          <conf-name>2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</conf-name>
          <conf-date>October 25-29, 2015</conf-date>
          <conf-loc>Doha</conf-loc>
          <fpage>1532</fpage>
          <lpage>1543</lpage>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Matero</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Idnani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Son</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Giorgi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zamani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Limbachyia</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Guntuku</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>HA</given-names>
            </name>
          </person-group>
          <article-title>Suicide risk assessment with multi-level dual-context language and BERT</article-title>
          <year>2019</year>
          <conf-name>Sixth Workshop on Computational Linguistics and Clinical Psychology</conf-name>
          <conf-date>2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>39</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/w19-3005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bitew</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Bekoulis</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Deleu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sterckx</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zaparojets</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Demeester</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Develder</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Predicting suicide risk from online postings in Reddit The UGent-IDLab submission to the CLPsych 2019 shared Task A</article-title>
          <year>2019</year>
          <conf-name>Sixth Workshop on Computational Linguistics and Clinical Psychology</conf-name>
          <conf-date>2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>158</fpage>
          <lpage>161</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/w19-3019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Iserman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nalabandian</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ireland</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Dictionaries and decision trees for the 2019 CLPsych Shared Task</article-title>
          <year>2019</year>
          <conf-name>Sixth Workshop on Computational Linguistics and Clinical Psychology</conf-name>
          <conf-date>2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>188</fpage>
          <lpage>194</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/w19-3025</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bagroy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Krishnamurti</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>ConvSent at CLPsych 2019 Task A: using post-level sentiment features for suicide risk prediction on Reddit</article-title>
          <year>2019</year>
          <conf-name>Sixth Workshop on Computational Linguistics and Clinical Psychology</conf-name>
          <conf-date>2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>182</fpage>
          <lpage>187</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>González Hevia</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cerezo Menéndez</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gayo-Avello</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Analyzing the use of existing systems for the CLPsych 2019 Share Task</article-title>
          <year>2019</year>
          <conf-name>Sixth Workshop on Computational Linguistics and Clinical Psychology</conf-name>
          <conf-date>2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>148</fpage>
          <lpage>151</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/w19-3017</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennebaker</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Blackburn</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <source>The Development and psychometric properties of LIWC</source>
          <year>2015</year>
          <month>09</month>
          <day>15</day>
          <access-date>2015-09-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://repositories.lib.utexas.edu/handle/2152/31333">https://repositories.lib.utexas.edu/handle/2152/31333</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
