<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i7e17784</article-id>
      <article-id pub-id-type="pmid">32729840</article-id>
      <article-id pub-id-type="doi">10.2196/17784</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Identifying and Predicting Intentional Self-Harm in Electronic Health Record Clinical Notes: Deep Learning Approach</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Bian</surname>
            <given-names>Jiang</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Li</surname>
            <given-names>Fang</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Shams</surname>
            <given-names>Shayan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Obeid</surname>
            <given-names>Jihad S</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Medical University of South Carolina</institution>
            <addr-line>135 Cannon St. Suite 405 MSC200</addr-line>
            <addr-line>Charleston, SC, 29425</addr-line>
            <country>United States</country>
            <phone>1 8437920272</phone>
            <email>jobeid@musc.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7193-7779</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Dahne</surname>
            <given-names>Jennifer</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7297-9420</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Christensen</surname>
            <given-names>Sean</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0702-1862</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Howard</surname>
            <given-names>Samuel</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1338-1300</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Crawford</surname>
            <given-names>Tami</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0720-7701</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Frey</surname>
            <given-names>Lewis J</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5388-280X</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Stecker</surname>
            <given-names>Tracy</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1464-5195</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Bunnell</surname>
            <given-names>Brian E</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4964-0688</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Medical University of South Carolina</institution>
        <addr-line>Charleston, SC</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>University of South Florida</institution>
        <addr-line>Tampa, FL</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jihad S Obeid <email>jobeid@musc.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>30</day>
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>7</issue>
      <elocation-id>e17784</elocation-id>
      <history>
        <date date-type="received">
          <day>13</day>
          <month>1</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>5</day>
          <month>3</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>25</day>
          <month>4</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>21</day>
          <month>5</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Jihad S Obeid, Jennifer Dahne, Sean Christensen, Samuel Howard, Tami Crawford, Lewis J Frey, Tracy Stecker, Brian E Bunnell. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 30.07.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2020/7/e17784" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Suicide is an important public health concern in the United States and around the world. There has been significant work examining machine learning approaches to identify and predict intentional self-harm and suicide using existing data sets. With recent advances in computing, deep learning applications in health care are gaining momentum.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to leverage the information in clinical notes using deep neural networks (DNNs) to (1) improve the identification of patients treated for intentional self-harm and (2) predict future self-harm events.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We extracted clinical text notes from electronic health records (EHRs) of 835 patients with International Classification of Diseases (ICD) codes for intentional self-harm and 1670 matched controls who never had any intentional self-harm ICD codes. The data were divided into training and holdout test sets. We tested a number of algorithms on clinical notes associated with the intentional self-harm codes using the training set, including several traditional bag-of-words–based models and 2 DNN models: a convolutional neural network (CNN) and a long short-term memory model. We also evaluated the predictive performance of the DNNs on a subset of patients who had clinical notes 1 to 6 months before the first intentional self-harm event. Finally, we evaluated the impact of a pretrained model using Word2vec (W2V) on performance.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The area under the receiver operating characteristic curve (AUC) for the CNN on the phenotyping task, that is, the detection of intentional self-harm in clinical notes concurrent with the events was 0.999, with an F1 score of 0.985. In the predictive task, the CNN achieved the highest performance with an AUC of 0.882 and an F1 score of 0.769. Although pretraining with W2V shortened the DNN training time, it did not improve performance.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The strong performance on the first task, namely, phenotyping based on clinical notes, suggests that such models could be used effectively for surveillance of intentional self-harm in clinical text in an EHR. The modest performance on the predictive task notwithstanding, the results using DNN models on clinical text alone are competitive with other reports in the literature using risk factors from structured EHR data.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>machine learning</kwd>
        <kwd>deep learning</kwd>
        <kwd>suicide</kwd>
        <kwd>suicide, attempted</kwd>
        <kwd>electronic health records</kwd>
        <kwd>natural language processing</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background and Significance</title>
        <p>Suicide ranks among the leading causes of death in the United States. On average, over 100 individuals die of suicide each day, resulting in combined medical and work loss costs totaling approximately US $80 billion annually [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Numerous risk factors for suicide have been identified and thoroughly researched. For example, suicide is more common in males, American Indian and Alaska Natives, and non-Hispanics and individuals with mental illness (eg, depression, anxiety, substance abuse), previous trauma, communication difficulties, decision-making impulsivity, and aggression [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Individuals who have previously engaged in intentional self-harm behaviors or suicide attempts are also at increased risk [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Despite extensive research on various risk factors, prospective suicide prediction remains difficult, as conventionally studied risk factors predict suicide attempts only 26% of the time [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
        <p>Currently established guidelines for suicide risk assessment include clinical interviews and questionnaires administered by qualified health care providers [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. However, research suggests that these approaches exhibit suboptimal performance in predicting future intentional self-harm behavior or suicide [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. Less than a third of patients who engage in intentional self-harm and attempt suicide disclose thoughts about doing so [<xref ref-type="bibr" rid="ref12">12</xref>]. As such, current methods for identification of at-risk patients can be difficult and time-consuming. A great deal of recent research has focused on addressing these limitations using advanced analytical tools such as natural language processing (NLP) and machine learning [<xref ref-type="bibr" rid="ref13">13</xref>]. Studies using NLP approaches have largely used electronic health record (EHR)-based [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>] and NLP- and linguistics-driven prediction models [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. Studies using machine learning to predict suicidal and intentional self-harm behaviors from EHR data for patients admitted to hospitals or emergency departments have demonstrated variable accuracy (eg, 65%-95%) [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref24">24</xref>].</p>
        <p>Clinical text classification using a deep convolutional network has been useful in the identification of specific phenotypes within the EHR for patients with a given set of clinical signs and symptoms [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. There have been significant advances in recent years in deep learning approaches, such as convolutional neural networks (CNNs), for a variety of applications including text processing and classification, computer vision, and speech recognition [<xref ref-type="bibr" rid="ref27">27</xref>]. In the area of text processing, there has been significant research in language models that are pretrained and then used to aid in automated text understanding of unlabeled data [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. These resulting learned word vectors could, in turn, be used for clinical text classification tasks [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. Pretraining models using these methods provide syntactic and semantic word similarities expressed in a multidimensional vector space with the potential for improving classifications based on neural networks and reducing computational cost [<xref ref-type="bibr" rid="ref28">28</xref>]. The use of advanced analytical approaches such as deep learning can extend this work and provide distinct advantages in predicting future intentional self-harm, suicide attempts, and suicide.</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>Deep learning approaches have been used to address topics related to suicide using publicly available data sets. For example, Shing et al [<xref ref-type="bibr" rid="ref31">31</xref>] compared different machine learning methods including support vector machines (SVM) and a CNN-based model for the assessment of suicide risk based on web-based postings. Although they demonstrated the utility of deep learning, for this specific use case, the SVM model outperformed the CNN model. Conversely, Du et al [<xref ref-type="bibr" rid="ref32">32</xref>] demonstrated the superiority of a deep learning model over traditional models, including an SVM, in identifying suicide-related tweets in social media data. Despite these examples, there have been no reports in the literature on the utility of deep learning approaches for the identification of suicide-related clinical records (eg, for surveillance purposes) or for the prediction of suicidal behavior using clinical text from an EHR. Improving the recall and precision of phenotyping and predictive algorithms, particularly through deep learning analytic techniques, could lead to better follow-up and care by clinicians for patients who are at risk for intentional self-harm, suicide attempts, suicide, or any combination thereof. In this study, we explored a deep learning approach for (1) the automated detection of intentional self-harm events in clinical text concurrent with International Classification of Diseases (ICD) codes for intentional self-harm, that is, phenotyping and (2) the prediction of future suicide attempts or intentional self-harm based on ICD-labeled encounters within the EHR.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Software Used</title>
        <p>We used R version 3.6.1 (R Foundation for Statistical Computing) [<xref ref-type="bibr" rid="ref33">33</xref>] for processing the data and clinical text and constructing the machine learning pipelines and Keras and TensorFlow v1.13 (Google’s open-source deep neural network framework) for the deep learning models.</p>
      </sec>
      <sec>
        <title>Patient Population</title>
        <p>This study was approved by the institutional review board (IRB) for human research at the Medical University of South Carolina (MUSC) under protocol number Pro00087416. Clinical notes were extracted from the Epic (Epic Systems Corporation) EHR system [<xref ref-type="bibr" rid="ref34">34</xref>] using the MUSC research data warehouse (RDW), which serves as an EHR data repository for research projects. Researchers may request data from the RDW with appropriate IRB approval and data governance oversight [<xref ref-type="bibr" rid="ref35">35</xref>]. We extracted clinical text notes for adult patients aged 20 to 90 years with ICD codes for suicide attempts or intentional self-harm as defined in the National Health Statistics Report (NHSR) from the Centers for Disease Control and Prevention (CDC) in the United States [<xref ref-type="bibr" rid="ref36">36</xref>]. The NHSR specifically included codes for self-harm events that were intentional (eg, T42.4X2; poisoning by benzodiazepines, intentional self-harm) and did not include codes for self-harm events that were unintentional (eg, T42.4X1; poisoning by benzodiazepines, accidental). For each patient in the study group, we selected the first intentional self-harm recorded in the chart during the study period (ie, 2012-2019). We filtered the notes within a 24-hour period of the intentional self-harm time stamp. We also extracted clinical text notes for control cases who never had any intentional self-harm ICD codes within our EHR spanning the years 2012 to 2019. The controls were selected randomly from the RDW after matching by age, gender, race, and ethnicity. During the processing of the clinical notes, we matched the controls to the study cases based on the proportion of note types in their records (eg, percent of progress notes) and word length of notes. The matching was performed using the nearest neighbor method in the MatchIt package in R [<xref ref-type="bibr" rid="ref37">37</xref>]. The resulting patient population included 835 intentional self-harm cases and 1670 controls.</p>
      </sec>
      <sec>
        <title>Clinical Notes</title>
        <sec>
          <title>Notes Concurrent With Intentional Self-Harm</title>
          <p>In the first part of this study, we sought to automate the detection of concurrent intentional self-harm ICD code assignment based on clinical text. The notes included a variety of different note types; however, the majority consisted of progress notes, plan of care notes, emergency department (ED) provider notes, history and physical (H&#38;P) notes, and consult notes. A full list of note types and their relative frequencies is provided in a table in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Individual notes longer than 800 words (less than one-third of all notes) were truncated at 800. We chose this cutoff to include as many notes per patient as possible. Notes belonging to the same patient were then concatenated into a single string arranged temporally, yielding 1 record per patient. Concatenated strings longer than 8000 words (44/2505, 1.76% of patients) were truncated at 8000. This allowed us to maintain the generated token vectors within a reasonable range for computational performance. The patients were divided into a training and cross-validation set (2012-2017) with 661 intentional self-harm cases and 1502 controls and a holdout test set (2018-2019) with 174 intentional self-harm cases and 168 controls.</p>
        </sec>
        <sec>
          <title>Prediction From Previous Clinical Notes</title>
          <p>In this part of the study, we sought to predict the future occurrence of intentional self-harm events based on previous clinical notes within the EHR. Clinical text was collected from a predictive window for a period between 180 days to 30 days before the index event (ie, the first reported intentional self-harm event on record) for each patient. Patients who did not have clinical notes during that time window were excluded. Clinical notes were used from the first date within that time window up to 90 days following the first date or up to 30 days before the intentional self-harm event (whichever is first). That is, the largest possible predictive window included clinical notes from a time interval of up to 90 days. The same time window was used for the control group; however, the latest visit on record within the study period was used as the index visit instead of an intentional self-harm event. To reduce noise and excessive amounts of notes in this part of the study, we limited notes to the following note types: progress notes, ED provider notes, H&#38;P notes, consult notes, and discharge summaries. Individual notes were truncated to 1500 words and concatenated texts to 10,000-word cutoffs to capture a wider set of clinical texts. For the prediction part of the study, the patients were divided into a training and cross-validation set (2012-2017) with 480 intentional self-harm cases and 645 controls and a holdout test set (2018-2019) with 106 intentional self-harm cases and 106 controls.</p>
        </sec>
      </sec>
      <sec>
        <title>Labeling the Test Set</title>
        <p>A sample of 200 records from the test set (2018-2019) was manually reviewed to provide gold standard labels for a comparison with ICD code labels (based on the NHSR from the CDC). Each record reflected clinical notes in the EHR from concurrent visits of patients. We selected a random 100 from the study group (with intentional self-harm ICDs) and 100 controls. The concatenated strings from concurrent notes for this sample were imported into REDCap (Research Electronic Data Capture) [<xref ref-type="bibr" rid="ref38">38</xref>] and made available for review and labeling by the reviewers on our research team, which included 3 clinical psychologists, a psychiatry resident, a medical student, and a pediatrician. The reviewers were instructed to label the notes as intentional self-harm if there was a suicide attempt or intentional self-harm noted in any of the clinical notes associated with the concurrent visit. Suicidal ideation alone was not considered intentional self-harm. A subsample of 100 notes was labeled independently by 2 labelers to estimate the interrater reliability.</p>
      </sec>
      <sec>
        <title>Text Processing</title>
        <p>We tested several machine learning algorithms using the training data, including both deep learning–based classifiers using word embeddings (WEs) and the traditional bag-of-words (BOW)–based models. We performed the necessary preprocessing of the text for both types. We used the quanteda R package [<xref ref-type="bibr" rid="ref39">39</xref>] and regular expression functions within R for the text-processing pipeline. For the traditional BOW models, text processing included lower casing; removal of punctuation, stop words, and numbers; word stemming; and tokenization. For the WE models, text processing included lower casing, sentence segmentation, removal of punctuation, replacement of large numbers and dates with tokens using regular expressions, and tokenization.</p>
      </sec>
      <sec>
        <title>Word Frequencies</title>
        <p>Before running the machine learning algorithms, we examined differences in word frequencies across clinical notes concurrent with intentional self-harm events and notes preceding intentional self-harm events by over 30 days as compared with clinical notes from the control population. We performed a chi-square analysis to assess keywords that are overrepresented across the corpora of text [<xref ref-type="bibr" rid="ref40">40</xref>].</p>
      </sec>
      <sec>
        <title>Bag-of-Words−Based Classifiers</title>
        <p>For the BOW models, word frequencies were used as features and were normalized using term frequency–inverse document frequency [<xref ref-type="bibr" rid="ref41">41</xref>]. The traditional text classification models included naïve Bayes [<xref ref-type="bibr" rid="ref42">42</xref>]; decision tree classifier [<xref ref-type="bibr" rid="ref43">43</xref>] with a maximum depth of 20; random forest (RF) [<xref ref-type="bibr" rid="ref44">44</xref>] with 201 trees and the number of variables randomly sampled as candidates at each split (mtry=150); SVM [<xref ref-type="bibr" rid="ref45">45</xref>] type 1 with a radial basis kernel [<xref ref-type="bibr" rid="ref46">46</xref>]; and a simple multilayer perceptron (MLP) artificial neural network with a 64-node input layer, a 64-node hidden layer, and a single output node. We used the rectified linear unit (ReLU) activation function in both the input and hidden layers and sigmoid activation for the binary output node. The MLP was trained using a learning rate of 1×10<sup>−4</sup>, a batch size of 32, and a 20% validation split over 30 epochs.</p>
      </sec>
      <sec>
        <title>Word Embeddings</title>
        <p>We used Keras [<xref ref-type="bibr" rid="ref47">47</xref>] and TensorFlow version 1.13 [<xref ref-type="bibr" rid="ref48">48</xref>] for constructing and training the deep learning models. In preparation for WE, the text strings were converted to token sequences. To construct the features for the deep learning models, the sequences were prepadded with zeros to match the length of the longest string in the training set. We used Word2vec (W2V) to generate a pretrained model [<xref ref-type="bibr" rid="ref28">28</xref>]. The W2V weights were derived by pretraining a W2V skip-gram model on a sample of over 800,000 clinical notes from our EHR data set using 200 dimensions per word, a skip window size of 5 words in each direction, and negative sampling of 5. To explore and visualize the outcome of the pretrained W2V model, we used the t-distributed stochastic neighbor embedding (t-SNE) to map the multidimensional word vectors into a 2D space [<xref ref-type="bibr" rid="ref49">49</xref>]. The performance of each deep learning classifier was assessed with either randomly initialized embeddings or W2V-initialized embeddings.</p>
      </sec>
      <sec>
        <title>Deep Learning Models</title>
        <p>We examined 2 different deep neural network (DNN) architectures: a CNN architecture similar to a previously published model [<xref ref-type="bibr" rid="ref26">26</xref>] and a long short-term memory (LSTM) model [<xref ref-type="bibr" rid="ref50">50</xref>]. Both architectures were tested using either randomly initialized WE weights in Keras or WE initialized with the weights from the pretrained W2V.</p>
        <p>Both models had WE with 200 dimensions per word. The input layer had a dimension size slightly exceeding the maximum length of the input sequences of tokens, which were 8352 tokens for the concurrent notes and 11,000 tokens for the predictive notes. The CNN architecture consisted of an input layer; a WE layer included with a drop rate of 0.2; a convolutional layer with multiple filter sizes (3, 4, and 5) in parallel, with 200 nodes in each, ReLU activation, a stride of one, and global max-pooling; a merge tensor then a fully connected 200-node hidden layer with ReLU activation and a drop rate of 0.2; and an output layer with a single binary node with a sigmoid activation function. The LSTM architecture consisted of an input layer; a WE layer with a drop rate of 0.1; an LSTM layer with 64 nodes; both global average pooling and global max-pooling layers with a merge tensor of the 2; a fully connected 100-node hidden layer with ReLU activation and a drop rate of 0.1; and a single sigmoid binary output node.</p>
        <p>The DNN models were trained using an adaptive moment estimation gradient descent algorithm [<xref ref-type="bibr" rid="ref51">51</xref>] with a diminishing learning rate starting at 4×10<sup>−4</sup>, batch size of 32, validation split at 15%, and early stopping based on the loss function for the validation data with patience of 5.</p>
      </sec>
      <sec>
        <title>Training and Evaluation</title>
        <sec>
          <title>Detection of Concurrent Intentional Self-Harm</title>
          <p>For the automated detection of concurrent intentional self-harm ICD code assignment based on clinical text, we used the training and cross-validation data set (with index visits from 2012-2017) to identify the best performing models and hyperparameters. We then used the top 2 performing models (the DNNs) for training on the full training set and testing on the holdout test set (with index visits from 2018 to 2019), which included the 200 manually reviewed cases. The models were trained using intentional self-harm ICD codes as positive labels. However, we tested the output using both intentional self-harm ICD codes as positive labels and manually reviewed (gold standard) labels.</p>
        </sec>
        <sec>
          <title>Prediction of Future Intentional Self-Harm Events</title>
          <p>The 2 best performing models, namely, the DNNs, were used to predict future intentional self-harm events based on previous clinical notes. In the holdout test set, we used a balanced set with an equal number of intentional self-harm cases and controls with 106 cases in each. The DNN models were trained on notes preceding the first intentional self-harm visits during the 2012 to 2017 time frame and then tested on notes preceding the first intentional self-harm visits during the 2018 to 2019 time frame. Unlike the previous task, which had near-ceiling performance results with little variation, the performance of the DNNs on the predictive task varied between different runs of the same model even when using the same training and testing sets. This is due to the random initialization of weights in TensorFlow and random shuffling between epochs during training. To evaluate the performance of the different DNN architectures more precisely, we ran each model 50 times and examined the averages of the different metrics and used the Student <italic>t</italic> test (two-tailed) to determine statistical differences in performance.</p>
        </sec>
        <sec>
          <title>Metrics</title>
          <p>The performance metrics for all experiments, including area under the receiver operating characteristic (ROC) curve (AUC), were calculated in R using the caret [<xref ref-type="bibr" rid="ref52">52</xref>] and pROC [<xref ref-type="bibr" rid="ref53">53</xref>] packages. We also calculated the accuracy, precision, recall, and F1 score for all the models.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>International Classification of Diseases Code Analysis</title>
        <p>The interrater reliability during the manual review exhibited a Cohen kappa of 0.96. Using the labels from the manual review as the gold standard, the accuracy of the intentional self-harm ICD codes attributed to concurrent visits was 0.92, with a precision of 0.84 and recall of 1.0. Thus, 16 cases out of 100 that were assigned an intentional self-harm ICD code did not exhibit intentional self-harm as part of the presenting history, per the manual review. However, all but 2 of the 16 <italic>false-positives</italic> by ICD had past intentional self-harm mentioned in their clinical notes. For those 2, 1 was suspected intentional self-harm, and the other had a previous admission for suicidal ideation with possible intentional self-harm.</p>
      </sec>
      <sec>
        <title>Word Frequency Results</title>
        <p>The result from this analysis overrepresented keywords in clinical notes concurrent with intentional self-harm events and clinical notes before the intentional self-harm events (<xref ref-type="table" rid="table1">Table 1</xref>). For example, the words <italic>suicide</italic> and <italic>attempt</italic> top the list in concurrent notes; however, they do not rank in the top 10 words in preceding notes. Instead, the words <italic>disorder</italic> and <italic>si</italic> (the shorthand for suicidal ideation) top the list in notes preceding intentional self-harm.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>The top 10 words in each group were compared with controls, along with the chi-square statistic for each.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Concurrent with ISH<sup>a,b</sup></td>
                <td colspan="2">Before ISH<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>Keyword</td>
                <td>Chi-square (<italic>df</italic>=1)</td>
                <td>Keyword</td>
                <td>Chi-square (<italic>df</italic>=1)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>suicide</td>
                <td>1.3E+5</td>
                <td>disorder</td>
                <td>1.2E+4</td>
              </tr>
              <tr valign="top">
                <td>attempt</td>
                <td>8.2E+4</td>
                <td>si<sup>d</sup></td>
                <td>8.5E+3</td>
              </tr>
              <tr valign="top">
                <td>overdose</td>
                <td>6.7E+4</td>
                <td>suicidal</td>
                <td>6.0E+3</td>
              </tr>
              <tr valign="top">
                <td>si</td>
                <td>6.5E+4</td>
                <td>mood</td>
                <td>5.8E+3</td>
              </tr>
              <tr valign="top">
                <td>disorder</td>
                <td>5.2E+4</td>
                <td>use</td>
                <td>4.7E+3</td>
              </tr>
              <tr valign="top">
                <td>suicidal</td>
                <td>5.2E+4</td>
                <td>alcohol</td>
                <td>4.6E+3</td>
              </tr>
              <tr valign="top">
                <td>psychiatry</td>
                <td>4.0E+4</td>
                <td>qhs<sup>e</sup></td>
                <td>4.5E+3</td>
              </tr>
              <tr valign="top">
                <td>iop<sup>f</sup></td>
                <td>3.6E+4</td>
                <td>safety</td>
                <td>4.2E+3</td>
              </tr>
              <tr valign="top">
                <td>interview</td>
                <td>3.5E+4</td>
                <td>interview</td>
                <td>3.9E+3</td>
              </tr>
              <tr valign="top">
                <td>mood</td>
                <td>2.9E+4</td>
                <td>cocaine</td>
                <td>3.9E+3</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Keywords from clinical notes from visits concurrent with ISH events.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>ISH: intentional self-harm.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>Keywords from clinical notes from visits before the first ISH events.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>si: suicidal ideation.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>iop: Institute of Psychiatry.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>qhs: every bedtime (from Latin quaque hora somni).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Word2vec Pretraining Results</title>
        <p>The W2V model successfully clustered words that seemed to have similar semantic contexts. <xref rid="figure1" ref-type="fig">Figure 1</xref> shows the visualization of a sample of relevant words reduced into 2 dimensions using the t-SNE algorithm. <xref ref-type="table" rid="table2">Table 2</xref> shows the top 10 words semantically similar to <italic>attempt</italic> and the top 10 words similar to <italic>ideation</italic> along with their cosine similarities. For example, the cosine similarity between <italic>attempt</italic> and <italic>suicide</italic> WE vectors was 0.730 and between <italic>ideation</italic> and <italic>suicidal</italic> was 0.872. The list also shows several misspelled words in a similar dimension space as their correctly spelled counterparts.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>A visualization of a sample of relevant words derived from the Word2vec model reduced into two dimensions using t-distributed stochastic neighbor embedding. V1=variable 1; V2=variable 2.</p>
          </caption>
          <graphic xlink:href="medinform_v8i7e17784_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Words semantically similar to the words attempt and ideation and their cosine similarity in the 200-dimension vector space as identified by the Word2vec analysis.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Term</td>
                <td>Cos sim<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">
                  <italic>attempt</italic>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>attempt</td>
                <td>1.000</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>suicide</td>
                <td>0.730</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>overdose</td>
                <td>0.696</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>osteoarthrithis</td>
                <td>0.679</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>gesture</td>
                <td>0.643</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>sucicide</td>
                <td>0.625</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>benzodiaspines</td>
                <td>0.619</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>intentional</td>
                <td>0.617</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <italic>ideation</italic>
                </td>
                <td>Cos sim<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ideation</td>
                <td>1.000</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>suicidal</td>
                <td>0.872</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>homicidal</td>
                <td>0.837</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ideations</td>
                <td>0.736</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>intent</td>
                <td>0.681</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ideaiton</td>
                <td>0.651</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>si<sup>b</sup></td>
                <td>0.648</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>sucidial</td>
                <td>0.619</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Cos sim: cosine similarity.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>si: suicidal ideation.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Detection of Concurrent Intentional Self-Harm</title>
        <sec>
          <title>Training and Cross-Validation</title>
          <p><xref ref-type="table" rid="table3">Table 3</xref> shows the results of the automated detection of concurrent intentional self-harm ICD code assignment based on the training and cross-validation data set with intentional self-harm visits during the period of 2012 to 2017. The DNNs outperformed the BOW classifiers. The CNN models had the highest AUC and F1 score. The best performance overall was for the CNN with W2V WE (CNNw) with an AUC of 0.988 and an F1 score of 0.928. The CNN with randomly initialized WE (CNNr) was a close second, with significantly overlapping 95% CIs. The LSTMs with randomly initialized WE (LSTMr) and the LSTM with W2V WE (LSTMw) AUCs were 0.982 and 0.975, respectively, with F1 scores above 0.887.</p>
          <p>Among the BOW models, RF had the best AUC (0.961), and MLP had the best F1 score (0.862). On the basis of these results, we used 2 deep learning models for the rest of this study.</p>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>The metrics for training and cross-validation on the 2012 to 2017 data set.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="220"/>
              <col width="250"/>
              <col width="250"/>
              <col width="100"/>
              <col width="80"/>
              <col width="100"/>
              <thead>
                <tr valign="top">
                  <td>Model</td>
                  <td>AUC<sup>a</sup> (95% CI<sup>b</sup>)</td>
                  <td>Accuracy (95% CI)</td>
                  <td>Precision</td>
                  <td>Recall</td>
                  <td>F1 score</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>NB<sup>c</sup></td>
                  <td>0.908 (0.882-0.934)</td>
                  <td>0.870 (0.839-0.898)</td>
                  <td>0.734</td>
                  <td>0.865</td>
                  <td>0.794</td>
                </tr>
                <tr valign="top">
                  <td>DT<sup>d</sup></td>
                  <td>0.870 (0.839-0.901)</td>
                  <td>0.865 (0.833-0.893)</td>
                  <td>0.715</td>
                  <td>0.885</td>
                  <td>0.791</td>
                </tr>
                <tr valign="top">
                  <td>RF<sup>e</sup></td>
                  <td>0.961 (0.944-0.978)</td>
                  <td>0.896 (0.867-0.921)</td>
                  <td>0.794</td>
                  <td>0.865</td>
                  <td>0.828</td>
                </tr>
                <tr valign="top">
                  <td>SVM<sup>f</sup></td>
                  <td>0.947 (0.925-0.969)</td>
                  <td>0.900 (0.872-0.924)</td>
                  <td>0.859</td>
                  <td>0.782</td>
                  <td>0.819</td>
                </tr>
                <tr valign="top">
                  <td>MLP<sup>g</sup></td>
                  <td>0.957 (0.938-0.976)</td>
                  <td>0.917 (0.890-0.939)</td>
                  <td>0.828</td>
                  <td>0.897</td>
                  <td>0.862</td>
                </tr>
                <tr valign="top">
                  <td>CNNr<sup>h</sup></td>
                  <td>0.984 (0.972-0.995)</td>
                  <td>0.946 (0.924-0.964)</td>
                  <td>0.938</td>
                  <td>0.872</td>
                  <td>0.904</td>
                </tr>
                <tr valign="top">
                  <td>CNNw<sup>i</sup></td>
                  <td>0.988 (0.977-0.999)</td>
                  <td>0.959 (0.939-0.974)</td>
                  <td>0.947</td>
                  <td>0.910</td>
                  <td>0.928</td>
                </tr>
                <tr valign="top">
                  <td>LSTMr<sup>j</sup></td>
                  <td>0.982 (0.972-0.992)</td>
                  <td>0.943 (0.920-0.961)</td>
                  <td>0.919</td>
                  <td>0.878</td>
                  <td>0.898</td>
                </tr>
                <tr valign="top">
                  <td>LSTMw<sup>k</sup></td>
                  <td>0.975 (0.960-0.990)</td>
                  <td>0.937 (0.913-0.956)</td>
                  <td>0.918</td>
                  <td>0.859</td>
                  <td>0.887</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table3fn1">
                <p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p>
              </fn>
              <fn id="table3fn2">
                <p><sup>b</sup>CI: 95% confidence intervals for the AUC.</p>
              </fn>
              <fn id="table3fn3">
                <p><sup>c</sup>NB: naïve Bayes.</p>
              </fn>
              <fn id="table3fn4">
                <p><sup>d</sup>DT: decision tree.</p>
              </fn>
              <fn id="table3fn5">
                <p><sup>e</sup>RF: random forest.</p>
              </fn>
              <fn id="table3fn6">
                <p><sup>f</sup>SVM: support vector machine.</p>
              </fn>
              <fn id="table3fn7">
                <p><sup>g</sup>MLP: multilayer perceptron.</p>
              </fn>
              <fn id="table3fn8">
                <p><sup>h</sup>CNNr: convolutional neural network with randomly initialized word embeddings.</p>
              </fn>
              <fn id="table3fn9">
                <p><sup>i</sup>CNNw: convolutional neural network with Word2vec word embeddings.</p>
              </fn>
              <fn id="table3fn10">
                <p><sup>j</sup>LSTMr: long short-term memory with randomly initialized word embeddings.</p>
              </fn>
              <fn id="table3fn11">
                <p><sup>k</sup>LSTMw: long short-term memory with Word2vec word embeddings.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Testing of Concurrent Intentional Self-Harm Labels</title>
          <p>Training the models on the full 2012 to 2017 data set then testing on the holdout (2018-2019) test set yielded even better performance than in the above cross-validation for detecting concurrent intentional self-harm ICD labels (<xref ref-type="table" rid="table4">Table 4</xref>). The best performing model was the CNNr with an AUC of 0.999 and an F1 score of 0.985. A plot of the training history for this task shows that the model converges smoothly to a minimum loss value on both training and validation (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). There was no advantage to adding the pretrained W2V WE, that is, the CNNw when testing on the holdout set. The CNNs slightly outperformed the LSTMs, but the results in all models were close to ceiling, making it difficult to point out the significance of these differences. As expected, as the models were trained on ICD labels, they performed better in predicting concurrent ICD labels than they did with predicting the gold standard labels (<xref rid="figure2" ref-type="fig">Figure 2</xref>). Of note, is that the recall remained very high when testing on the gold standard labels compared with the ICD labels, whereas the precision suffered slightly reflecting the precision achieved during the intentional self-harm ICD code analysis.</p>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>The metrics for training on the 2012 to 2017 data set and testing on the 2018 to 2019 holdout test set using both International Classification of Diseases labels and gold standard labels.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="130"/>
              <col width="0"/>
              <col width="240"/>
              <col width="0"/>
              <col width="250"/>
              <col width="0"/>
              <col width="130"/>
              <col width="0"/>
              <col width="100"/>
              <col width="0"/>
              <col width="120"/>
              <thead>
                <tr valign="top">
                  <td colspan="3">Model</td>
                  <td colspan="2">AUC<sup>a</sup> (95% CI<sup>b</sup>)</td>
                  <td colspan="2">Accuracy (95% CI)</td>
                  <td colspan="2">Precision</td>
                  <td colspan="2">Recall</td>
                  <td>F1 score</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="12">
                    <bold>ICD<sup>c</sup>labels</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>CNNr<sup>d</sup></td>
                  <td colspan="2">0.999 (0.998-1.000)</td>
                  <td colspan="2">0.985 (0.957-0.997)</td>
                  <td colspan="2">0.980</td>
                  <td colspan="2">0.990</td>
                  <td colspan="2">0.985</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>CNNw<sup>e</sup></td>
                  <td colspan="2">0.998 (0.996-1.000)</td>
                  <td colspan="2">0.970 (0.936-0.989)</td>
                  <td colspan="2">0.980</td>
                  <td colspan="2">0.960</td>
                  <td colspan="2">0.970</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>LSTMr<sup>f</sup></td>
                  <td colspan="2">0.997 (0.991-1.000)</td>
                  <td colspan="2">0.980 (0.950-0.995)</td>
                  <td colspan="2">0.990<sup>d</sup></td>
                  <td colspan="2">0.970</td>
                  <td colspan="2">0.980</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>LSTMw<sup>g</sup></td>
                  <td colspan="2">0.997 (0.994-1.000)</td>
                  <td colspan="2">0.960 (0.923-0.983)</td>
                  <td colspan="2">0.989</td>
                  <td colspan="2">0.930</td>
                  <td colspan="2">0.959</td>
                </tr>
                <tr valign="top">
                  <td colspan="12">
                    <bold>Gold standard labels</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>CNNr<sup>c</sup></td>
                  <td colspan="2">0.981 (0.966-0.997)</td>
                  <td colspan="2">0.915 (0.867-0.950)</td>
                  <td colspan="2">0.832</td>
                  <td colspan="2">1.000</td>
                  <td colspan="2">0.908</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>CNNw<sup>e</sup></td>
                  <td colspan="2">0.981 (0.965-0.997)</td>
                  <td colspan="2">0.920 (0.873-0.954)</td>
                  <td colspan="2">0.847</td>
                  <td colspan="2">0.988</td>
                  <td colspan="2">0.912</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>LSTMr<sup>f</sup></td>
                  <td colspan="2">0.968 (0.946-0.989)</td>
                  <td colspan="2">0.910 (0.861-0.946)</td>
                  <td colspan="2">0.837</td>
                  <td colspan="2">0.976</td>
                  <td colspan="2">0.901</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>LSTMw<sup>g</sup></td>
                  <td colspan="2">0.967 (0.945-0.989)</td>
                  <td colspan="2">0.920 (0.873-0.954)</td>
                  <td colspan="2">0.862</td>
                  <td colspan="2">0.964</td>
                  <td colspan="2">0.910</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table4fn1">
                <p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p>
              </fn>
              <fn id="table4fn2">
                <p><sup>b</sup>CI: 95% confidence intervals for the AUC.</p>
              </fn>
              <fn id="table4fn3">
                <p><sup>c</sup>ICD: International Classification of Diseases.</p>
              </fn>
              <fn id="table4fn4">
                <p><sup>d</sup>CNNr: convolutional neural network with randomly initialized word embeddings.</p>
              </fn>
              <fn id="table4fn5">
                <p><sup>e</sup>CNNw: convolutional neural network with Word2vec word embeddings.</p>
              </fn>
              <fn id="table4fn6">
                <p><sup>f</sup>LSTMr: long short-term memory with randomly initialized word embeddings.</p>
              </fn>
              <fn id="table4fn7">
                <p><sup>g</sup>LSTMw: long short-term memory with Word2vec word embeddings.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>The area under the receiver operating characteristic curve for training on the 2012 to 2017 data set and testing on the holdout test set (2018-2019) using (1) International Classification of Diseases labels and (2) gold standard labels. AUC: area under the receiver operating characteristic curve; ICD: International Classification of Diseases; CNNr: convolutional neural network with randomly initialized word embeddings; CNNw: convolutional neural network with Word2vec word embeddings; LSTMr: long short-term memory with randomly initialized word embedding; LSTMw: long short-term memory with Word2vec word embedding.</p>
            </caption>
            <graphic xlink:href="medinform_v8i7e17784_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Prediction of Future Intentional Self-Harm Events</title>
        <p>The results for the prediction of future intentional self-harm events based on previous clinical notes are shown in <xref ref-type="table" rid="table5">Table 5</xref>. These values are the means of the different metrics after 50 training and testing cycles for each model. <xref rid="figure3" ref-type="fig">Figure 3</xref> shows the differences in performance between the different models. The CNNr model had the best performance, with a mean AUC of 0.882 and a standard deviation of 0.006 (<italic>P</italic>&#60;.001) compared with CNNw, which in turn outperformed the LSTM models (<italic>P</italic>&#60;.001). There was no significant difference between LSTMr and LSTMw. The variance in performance was notably wider in the LSTM models than in the CNN models. <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> shows the ROC curves for each of the models highlighting the mean AUC. Although pretraining with W2V did not add value in terms of performance, it did reduce the number of epochs needed during training by an average of 32% for the CNN and 12% for the LSTM.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>The metrics for models trained on notes preceding the first intentional self-harm visits in patients presenting during the 2012 to 2017 time frame and tested on notes preceding the first intentional self-harm visits in patients presenting during the 2018 to 2019 time frame.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="130"/>
            <col width="260"/>
            <col width="260"/>
            <col width="130"/>
            <col width="100"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>AUC<sup>a</sup> (95% CI<sup>b</sup>)</td>
                <td>Accuracy (95% CI)</td>
                <td>Precision</td>
                <td>Recall</td>
                <td>F1 score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>CNNr<sup>c</sup></td>
                <td>0.882 (0.871-0.891)</td>
                <td>0.792 (0.774-0.807)</td>
                <td>0.863</td>
                <td>0.694</td>
                <td>0.769</td>
              </tr>
              <tr valign="top">
                <td>CNNw<sup>d</sup></td>
                <td>0.869 (0.858-0.879)</td>
                <td>0.782 (0.766-0.792)</td>
                <td>0.860</td>
                <td>0.673</td>
                <td>0.755</td>
              </tr>
              <tr valign="top">
                <td>LSTMr<sup>e</sup></td>
                <td>0.850 (0.827-0.877)</td>
                <td>0.758 (0.729-0.788)</td>
                <td>0.830</td>
                <td>0.656</td>
                <td>0.729</td>
              </tr>
              <tr valign="top">
                <td>LSTMw<sup>f</sup></td>
                <td>0.846 (0.819-0.871)</td>
                <td>0.750 (0.717-0.778)</td>
                <td>0.822</td>
                <td>0.644</td>
                <td>0.720</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>CI: 95% confidence intervals for the AUC.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>b</sup>CNNr: convolutional neural network with randomly initialized word embeddings.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>CNNw: convolutional neural network with Word2vec word embeddings.</p>
            </fn>
            <fn id="table5fn5">
              <p><sup>e</sup>LSTMr: long short-term memory with randomly initialized word embeddings.</p>
            </fn>
            <fn id="table5fn6">
              <p><sup>f</sup>LSTMw: long short-term memory with Word2vec word embeddings.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>The mean area under the receiver operating characteristic curve and 95% CI for models trained on notes preceding the first intentional self-harm visits in patients presenting during the 2012 to 2017 time frame and tested on notes preceding the first intentional self-harm visits in patients presenting during the 2018 to 2019 time frame. The differences in performance were all significant (<italic>P</italic>&#60;.001) except for the difference between the LSTMr and LSTMw. AUC: area under the receiver operating characteristic curve; CNNr: convolutional neural network with randomly initialized word embeddings; CNNw: convolutional neural network with Word2vec word embeddings; LSTMr: long short-term memory with randomly initialized word embedding; LSTMw: long short-term memory with Word2vec word embedding.</p>
          </caption>
          <graphic xlink:href="medinform_v8i7e17784_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Semantic Differences</title>
        <p>The word frequency analyses identified keywords that were overrepresented in clinical notes associated with intentional self-harm visits. As noted in <xref ref-type="table" rid="table1">Table 1</xref>, words such as <italic>attempt</italic> and <italic>overdose</italic> were highly overrepresented in clinical notes concurrent with intentional self-harm events compared with controls. Conversely, suicidal ideation (as represented by the shorthand word <italic>si</italic>) was frequently present in preintentional self-harm notes. This is consistent with the literature on ideation, which is a prominent risk factor for suicide attempts and completions [<xref ref-type="bibr" rid="ref54">54</xref>].</p>
        <p>The W2V pretraining on our full data set of clinical notes successfully clustered relevant words together. It also demonstrated word similarity for some of the significant words identified above. For example, the words <italic>attempt</italic>, <italic>suicide</italic>, and <italic>overdose</italic> were closely linked with high cosine similarity. This model was also useful in clustering misspelled words with their correctly spelled counterparts, which may help reduce noise due to misspelling in the clinical notes.</p>
      </sec>
      <sec>
        <title>Detection of Intentional Self-Harm Events</title>
        <p>The deep learning models outperformed BOW models in identifying intentional self-harm in training and testing using the 2012 to 2017 data set. Given this outcome, we trained the deep learning models on the full 2012 to 2017 data set and then used the 2018 to 2019 data set as a holdout test set. This temporal division of the data is intended to replicate a real-world scenario where models could be trained on historical data to identify intentional self-harm in new records. The results show that we can accurately detect intentional self-harm events in concurrent clinical notes with intentional self-harm ICD codes. More specifically, we showed that a model trained on aggregated clinical text associated with a given intentional self-harm visit may be used to identify concurrent intentional self-harm events even if ICD codes were not yet provided or assigned. In other words, clinical text alone is useful in accurately identifying the intentional self-harm phenotype.</p>
        <p>Although there is limited literature on the performance of NLP and machine learning approaches for the phenotyping of intentional self-harm, our DNN classifiers with precisions up to 99% for concurrent notes with intentional self-harm ICD codes and up to 86% for gold standard intentional self-harm events compare favorably with previous reports, especially when considering that the models were trained on ICD codes as labels. Using a hybrid machine learning and rule-based NLP approach, Fernandes et al [<xref ref-type="bibr" rid="ref19">19</xref>] achieved a precision of 82.8% for identifying suicide attempts. Another study comparing the accuracy of ICD codes and NLP-extracted concepts for suicidality achieved a precision of 60% using NLP alone and 97% using both ICD-9 codes and NLP; however, this study did not differentiate between suicidal ideation and intentional self-harm [<xref ref-type="bibr" rid="ref16">16</xref>].</p>
        <p>Although the CNN-based models seemed to slightly outperform the LSTM-based models on the phenotyping task, it is difficult to show a significant advantage to using either model or the advantage of pretraining with W2V due to the near-ceiling performance of all the DNNs on this task and the relatively small data set.</p>
        <p>Nonetheless, a DNN model trained using this method may be useful for surveillance purposes and could well supplement surveillance using ICD codes. Training such a model using intentional self-harm ICD codes as positive labels is dependent on reliable assignment of ICD codes. Fortunately, ICD codes for intentional self-harm at our institution were accurate, as shown by the manual review of charts, notwithstanding the limitation of a relatively high false-positive rate. Finally, accurate phenotyping of the intentional self-harm events paves the way for future directions in identifying other phenotypes, for example, those with suicidal ideation alone versus intentional self-harm or not intentional self-harm, which may or may not have accurate ICD codes. Such precise or deep phenotyping is an important step toward predicting the risk of mortality, given the availability of mortality data.</p>
      </sec>
      <sec>
        <title>Prediction of Future Intentional Self-Harm Based on Clinical Text</title>
        <p>The results also show that aggregated clinical notes from visits between 1 and 6 months before the index visit predicted future intentional self-harm events with an AUC of 0.882 for the <italic>best performing</italic> CNN model. These results compare favorably with the literature on predictive models for suicide attempts. Using a complex combination of structured EHR data (including demographics, diagnostic codes, and census-based socioeconomic status) and medication data extracted via NLP, Walsh et al [<xref ref-type="bibr" rid="ref20">20</xref>] achieved a maximum AUC of 0.84. Moreover, this AUC was based only on 7-day-old data. The AUC dropped gradually to 0.81 as the predictive window widened to 6 months before the index visit.</p>
        <p>When comparing the performance between the 2 DNN architectures, we noted a consistent and statistically significant performance advantage of the 2 CNN models over the LSTM-based ones (<xref rid="figure3" ref-type="fig">Figure 3</xref>). Moreover, the LSTM had a relatively high variance and inconsistent performance over the 50 training runs, as can be noted from the CIs. We also noted a higher computational cost for the LSTM over the CNN (almost twice the time needed for training per epoch). In addition to the higher computational cost, recurrent neural networks show a minor advantage in generic text classification tasks [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>]. At least with a small data set like ours, the CNNs were found to converge more smoothly and provide better performance.</p>
        <p>While the W2V pretraining clustered similar words, initializing the WE layer with W2V weights did not add any value to either of the predictive models. Although CNNr (AUC=0.882) performed only slightly better than CNNw (AUC=0.869), the difference was statistically significant. However, there was no difference between the LSTMr and LSTMw. These results were unexpected given the advantages of pretrained WE in picking up misspellings and word similarities and highlight the need to examine newer, more complex language models such as Google’s (Alphabet Inc) Bidirectional Encoder Representations from Transformers [<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        <p>Regardless of the model architecture, these results are promising. Such predictive models may be useful in stratifying hospitalized patients into risk categories, which may aid in discharge planning. Using technology (telephone, emails, or text messages) for follow-up in the postdischarge period has been shown to reduce risk of future suicide attempts [<xref ref-type="bibr" rid="ref57">57</xref>]. Furthermore, patients could be prophylactically assigned a social worker; be directed to collaborative primary care clinics with access to mental health services; or receive mental health referrals, telehealth appointments, or home health visits [<xref ref-type="bibr" rid="ref58">58</xref>]. Adequate refinement of a predictive model may even allow for stratification of patients to a level of care necessary post discharge, beyond simple binary risk categorization.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>To identify patients with intentional self-harm during a given visit, we trained the models on ICD codes. Therefore, they can only perform as well as the ICD code designation. As mentioned earlier, during the manual labeling process, several patients had a past medical history of intentional self-harm rather than suicide attempt or self-harm as part of the presenting chief complaint or diagnosis. A possible solution would be to train models to introduce multiple labels that include current and past intentional self-harm through manual review. However, this would require a manual review of several hundreds of charts, which was beyond the scope of this initial pilot work.</p>
        <p>Moreover, although we can clearly identify intentional self-harm, this still does not specify <italic>intent to die</italic>. This highlights the need for data on fatalities due to suicide. There are multiple forms of self-injury (eg, firearms, sharp objects, jumping from a high place) with ICD codes that are not accompanied by the classification of intent to harm oneself. Therefore, in these instances of unknown intent, self-injury may reflect a multitude of motives: communicating distress, suicidal gestures with low lethality, nonsuicidal self-injury (NSSI), or fatality [<xref ref-type="bibr" rid="ref59">59</xref>]. Existing literature predicting NSSI behaviors yields 3 notable risk factor categories: history of NSSI, cluster B personality, and hopelessness [<xref ref-type="bibr" rid="ref60">60</xref>]. Identifying NSSI can be of a significant prognostic value and has not been distinguished from intent to die in this study.</p>
        <p>Another limitation of this study is that our model currently only addresses features within clinical texts. Other clinical information could be added to the model, such as associated demographics, comorbidities, and risk factors (eg, codes for depression or substance use). Moreover, with respect to suicide prediction, EHR data alone may not provide a full picture. Ideally, our data should be linked with the statewide cause of death data, which should yield an improved predictive power.</p>
        <p>Although deep learning models are more powerful, they are less interpretable than some of the BOW models. For example, when using an RF model, the results of a variable importance analysis may yield insight into significant words. In fact, it may be beneficial to use both types of predictive models in mental health applications. This would leverage the power of deep learning models as well as the advantages of interpretable models. Future work should also include the exploration of attention-based deep learning models with some insight into explainability [<xref ref-type="bibr" rid="ref61">61</xref>], which may address the utility of these models in real-world clinical decision support and adoption by clinicians.</p>
        <p>Finally, the results presented here are based on data from a single EHR system at 1 academic medical center, making it difficult to draw generalizations about the high level of performance of our models in other environments. Future work should include collaboration with other institutions to ascertain the performance of these models in other environments.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Most of the models showed relatively good performance when detecting intentional self-harm events in concurrent clinical notes, that is, the phenotyping task. This is likely due to a strong signal within concurrent notes and is associated with a high fidelity of ICD code attribution for intentional self-harm, at least at our institution. When applied to the prediction of a future occurrence of intentional self-harm code assignment in a patient chart based on previous clinical notes, the AUC dropped to 0.882 with a modest recall and precision. Nevertheless, our results are competitive with the results from other models reported in the literature. Improving the precision of these algorithms could lead to better follow-up and preventative care by mental health professionals for patients who are at risk for future suicide attempts.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>The full list of note types and their relative frequencies in the data set.</p>
        <media xlink:href="medinform_v8i7e17784_app1.docx" xlink:title="DOCX File , 19 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>A plot of the convolutional neural network model’s training history for the phenotyping task. The learning curve shows that the model converges smoothly to a minimum loss value on both training and validation sets using an Adam optimizer.</p>
        <media xlink:href="medinform_v8i7e17784_app2.png" xlink:title="PNG File , 37 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Plots of the receiver operating characteristic curves for the 50 training and testing runs for all the models highlighting the mean area under the receiver operating characteristic curve for each model.</p>
        <media xlink:href="medinform_v8i7e17784_app3.png" xlink:title="PNG File , 235 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BOW</term>
          <def>
            <p>bag-of-words</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CDC</term>
          <def>
            <p>Centers for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CNNr</term>
          <def>
            <p>CNN with randomly initialized word embedding</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CNNw</term>
          <def>
            <p>CNN with Word2vec word embedding</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">DNN</term>
          <def>
            <p>deep neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">ED</term>
          <def>
            <p>emergency department</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">H&#38;P</term>
          <def>
            <p>history and physical</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">IRB</term>
          <def>
            <p>institutional review board</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">LSTMr</term>
          <def>
            <p>LSTM with randomly initialized word embedding</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">LSTMw</term>
          <def>
            <p>LSTM with Word2vec word embedding</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">MLP</term>
          <def>
            <p>multilayer perceptron</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">MUSC</term>
          <def>
            <p>Medical University of South Carolina</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">NHSR</term>
          <def>
            <p>National Health Statistics Report</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">NSSI</term>
          <def>
            <p>nonsuicidal self-injury</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb21">RDW</term>
          <def>
            <p>research data warehouse</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb22">ReLU</term>
          <def>
            <p>rectified linear unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb23">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb24">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb25">SVM</term>
          <def>
            <p>support vector machines</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb26">t-SNE</term>
          <def>
            <p>t-distributed stochastic neighbor embedding</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb27">W2V</term>
          <def>
            <p>Word2vec</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb28">WE</term>
          <def>
            <p>word embedding</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This project was supported, in part, by the National Center for Advancing Translational Sciences of the National Institutes of Health under grant number UL1 TR001450, the National Institute on Drug Abuse (K23 DA045766 to JD), and the National Institute of Mental Health (K23 MH118482 to BB). The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>JD is co-owner of the Behavioral Activation Tech LLC, a company that develops technology-based treatments for depression.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kochanek</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Arias</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Mortality in the United States, 2016</article-title>
          <source>NCHS Data Brief</source>
          <year>2017</year>
          <month>12</month>
          <issue>293</issue>
          <fpage>1</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cdc.gov/nchs/data/databriefs/db293.pdf"/>
          </comment>
          <pub-id pub-id-type="medline">29319473</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>Fatal Injury Data</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2019-12-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/injury/wisqars/fatal.html">https://www.cdc.gov/injury/wisqars/fatal.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <article-title>Preventing Suicide</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2019-12-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/violenceprevention/suicide/fastfact.html">https://www.cdc.gov/violenceprevention/suicide/fastfact.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gvion</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Levi-Belz</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Serious suicide attempts: systematic review of psychological risk factors</article-title>
          <source>Front Psychiatry</source>
          <year>2018</year>
          <volume>9</volume>
          <fpage>56</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fpsyt.2018.00056"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyt.2018.00056</pub-id>
          <pub-id pub-id-type="medline">29563886</pub-id>
          <pub-id pub-id-type="pmcid">PMC5845877</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Franklin</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Ribeiro</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Bentley</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Kleiman</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Musacchio</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Jaroszewski</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>BP</given-names>
            </name>
            <name name-style="western">
              <surname>Nock</surname>
              <given-names>MK</given-names>
            </name>
          </person-group>
          <article-title>Risk factors for suicidal thoughts and behaviors: a meta-analysis of 50 years of research</article-title>
          <source>Psychol Bull</source>
          <year>2017</year>
          <month>02</month>
          <volume>143</volume>
          <issue>2</issue>
          <fpage>187</fpage>
          <lpage>232</lpage>
          <pub-id pub-id-type="doi">10.1037/bul0000084</pub-id>
          <pub-id pub-id-type="medline">27841450</pub-id>
          <pub-id pub-id-type="pii">2016-54856-001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Bhatti</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Meader</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Stockton</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Kapur</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kendall</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Predicting suicide following self-harm: systematic review of risk factors and risk scales</article-title>
          <source>Br J Psychiatry</source>
          <year>2016</year>
          <month>10</month>
          <volume>209</volume>
          <issue>4</issue>
          <fpage>277</fpage>
          <lpage>83</lpage>
          <pub-id pub-id-type="doi">10.1192/bjp.bp.115.170050</pub-id>
          <pub-id pub-id-type="medline">27340111</pub-id>
          <pub-id pub-id-type="pii">S000712500024511X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jacobs</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Baldessarini</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Conwell</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fawcett</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Horton</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Meltzer</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pfeffer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Assessment and treatment of patients with suicidal behaviors</article-title>
          <source>Practice Guideline for the Assessment and Treatment of Patients with Suicidal Behaviors</source>
          <year>2010</year>
          <access-date>2020-06-10</access-date>
          <publisher-loc>Washington, D.C</publisher-loc>
          <publisher-name>American Psychiatric Association Steering Committee on Practice Guidelines</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://psychiatryonline.org/pb/assets/raw/sitewide/practice_guidelines/guidelines/suicide.pdf">https://psychiatryonline.org/pb/assets/raw/sitewide/practice_guidelines/guidelines/suicide.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="book">
          <source>Surveillance Report 2016 – Self-Harm in Over 8s: Short-Term Management and Prevention of Recurrence (2004) NICE Guideline CG16 and Self-Harm in Over 8s: Long Term Management (2011) NICE Guideline CG133</source>
          <year>2016</year>
          <publisher-loc>London</publisher-loc>
          <publisher-name>National Institute for Health and Care Excellence (UK)</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Larkin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Di Blasi</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Arensman</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Risk factors for repetition of self-harm: a systematic review of prospective hospital-based studies</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>e84282</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0084282"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0084282</pub-id>
          <pub-id pub-id-type="medline">24465400</pub-id>
          <pub-id pub-id-type="pii">PONE-D-12-39886</pub-id>
          <pub-id pub-id-type="pmcid">PMC3896350</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bolton</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Gunnell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Turecki</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Suicide risk assessment and intervention in people with mental illness</article-title>
          <source>Br Med J</source>
          <year>2015</year>
          <month>11</month>
          <day>9</day>
          <volume>351</volume>
          <fpage>h4978</fpage>
          <pub-id pub-id-type="doi">10.1136/bmj.h4978</pub-id>
          <pub-id pub-id-type="medline">26552947</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Runeson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Odeberg</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pettersson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Edbom</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Adamsson</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Waern</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Instruments for the assessment of suicide risk: a systematic review evaluating the certainty of the evidence</article-title>
          <source>PLoS One</source>
          <year>2017</year>
          <volume>12</volume>
          <issue>7</issue>
          <fpage>e0180292</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0180292"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0180292</pub-id>
          <pub-id pub-id-type="medline">28723978</pub-id>
          <pub-id pub-id-type="pii">PONE-D-16-38967</pub-id>
          <pub-id pub-id-type="pmcid">PMC5517300</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Poulin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shiner</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Vepstas</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Young-Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Goertzel</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Watts</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Flashman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>McAllister</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Predicting the risk of suicide by analyzing the text of clinical notes</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>e85733</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0085733"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0085733</pub-id>
          <pub-id pub-id-type="medline">24489669</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-32997</pub-id>
          <pub-id pub-id-type="pmcid">PMC3904866</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burke</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Ammerman</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobucci</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>The use of machine learning in the study of suicidal and non-suicidal self-injurious thoughts and behaviors: a systematic review</article-title>
          <source>J Affect Disord</source>
          <year>2019</year>
          <month>02</month>
          <day>15</day>
          <volume>245</volume>
          <fpage>869</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jad.2018.11.073</pub-id>
          <pub-id pub-id-type="medline">30699872</pub-id>
          <pub-id pub-id-type="pii">S0165-0327(18)31750-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kessler</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Stein</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Petukhova</surname>
              <given-names>MV</given-names>
            </name>
            <name name-style="western">
              <surname>Bliese</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bossarte</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Bromet</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Fullerton</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Gilman</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Ivany</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lewandowski-Romps</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Millikan Bell</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Naifeh</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Nock</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Reis</surname>
              <given-names>BY</given-names>
            </name>
            <name name-style="western">
              <surname>Rosellini</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sampson</surname>
              <given-names>NA</given-names>
            </name>
            <name name-style="western">
              <surname>Zaslavsky</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Ursano</surname>
              <given-names>RJ</given-names>
            </name>
            <collab>Army STARRS Collaborators</collab>
          </person-group>
          <article-title>Predicting suicides after outpatient mental health visits in the army study to assess risk and resilience in servicemembers (army STARRS)</article-title>
          <source>Mol Psychiatry</source>
          <year>2017</year>
          <month>04</month>
          <volume>22</volume>
          <issue>4</issue>
          <fpage>544</fpage>
          <lpage>51</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27431294"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/mp.2016.110</pub-id>
          <pub-id pub-id-type="medline">27431294</pub-id>
          <pub-id pub-id-type="pii">mp2016110</pub-id>
          <pub-id pub-id-type="pmcid">PMC5247428</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Karlson</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Gelaye</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Finan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Avillach</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Smoller</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Screening pregnant women for suicidal behavior in electronic medical records: diagnostic codes vs clinical notes processed by natural language processing</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2018</year>
          <month>05</month>
          <day>29</day>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>30</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-018-0617-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-018-0617-7</pub-id>
          <pub-id pub-id-type="medline">29843698</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-018-0617-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5975502</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haerian</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Salmasian</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Methods for identifying suicide or suicidal ideation in EHRs</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2012</year>
          <volume>2012</volume>
          <fpage>1244</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23304402"/>
          </comment>
          <pub-id pub-id-type="medline">23304402</pub-id>
          <pub-id pub-id-type="pmcid">PMC3540459</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McCoy</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Castro</surname>
              <given-names>VM</given-names>
            </name>
            <name name-style="western">
              <surname>Roberson</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Snapper</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Perlis</surname>
              <given-names>RH</given-names>
            </name>
          </person-group>
          <article-title>Improving prediction of suicide and accidental death after discharge from general hospitals with natural language processing</article-title>
          <source>JAMA Psychiatry</source>
          <year>2016</year>
          <month>10</month>
          <day>1</day>
          <volume>73</volume>
          <issue>10</issue>
          <fpage>1064</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1001/jamapsychiatry.2016.2172</pub-id>
          <pub-id pub-id-type="medline">27626235</pub-id>
          <pub-id pub-id-type="pii">2548276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Downs</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Velupillai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>George</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Holden</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kikoler</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandes</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dutta</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Detection of suicidality in adolescents with autism spectrum disorders: developing a natural language processing approach for use in electronic health records</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2017</year>
          <volume>2017</volume>
          <fpage>641</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29854129"/>
          </comment>
          <pub-id pub-id-type="medline">29854129</pub-id>
          <pub-id pub-id-type="pmcid">PMC5977628</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fernandes</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Dutta</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Velupillai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sanyal</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chandran</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Identifying suicide ideation and suicidal attempts in a psychiatric clinical research database using natural language processing</article-title>
          <source>Sci Rep</source>
          <year>2018</year>
          <month>05</month>
          <day>9</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>7426</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.1038/s41598-018-25773-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-018-25773-2</pub-id>
          <pub-id pub-id-type="medline">29743531</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-018-25773-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC5943451</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Walsh</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Ribeiro</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Franklin</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Predicting risk of suicide attempts over time through machine learning</article-title>
          <source>Clinical Psychological Science</source>
          <year>2017</year>
          <month>04</month>
          <day>11</day>
          <volume>5</volume>
          <issue>3</issue>
          <fpage>457</fpage>
          <lpage>469</lpage>
          <pub-id pub-id-type="doi">10.1177/2167702617691560</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Delgado-Gomez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Baca-Garcia</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Aguado</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Courtet</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez-Castroman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Computerized adaptive test vs decision trees: development of a support decision system to identify suicidal behavior</article-title>
          <source>J Affect Disord</source>
          <year>2016</year>
          <month>12</month>
          <volume>206</volume>
          <fpage>204</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jad.2016.07.032</pub-id>
          <pub-id pub-id-type="medline">27475891</pub-id>
          <pub-id pub-id-type="pii">S0165-0327(16)30489-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Metzger</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tvardik</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gicquel</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Bouvry</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Poulet</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Potinet-Pagliaroli</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Use of emergency department electronic medical records for automated epidemiological surveillance of suicide attempts: a French pilot study</article-title>
          <source>Int J Methods Psychiatr Res</source>
          <year>2017</year>
          <month>06</month>
          <volume>26</volume>
          <issue>2</issue>
          <fpage>-</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27634457"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/mpr.1522</pub-id>
          <pub-id pub-id-type="medline">27634457</pub-id>
          <pub-id pub-id-type="pmcid">PMC6877202</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lopez-Castroman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Perez-Rodriguez</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Jaussent</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Alegria</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Artes-Rodriguez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Freed</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Guillaume</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jollant</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Leiva-Murillo</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Malafosse</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oquendo</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>de Prado-Cumplido</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Saiz-Ruiz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Baca-Garcia</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Courtet</surname>
              <given-names>P</given-names>
            </name>
            <collab>European Research Consortium for Suicide (EURECA)</collab>
          </person-group>
          <article-title>Distinguishing the relevant features of frequent suicide attempters</article-title>
          <source>J Psychiatr Res</source>
          <year>2011</year>
          <month>05</month>
          <volume>45</volume>
          <issue>5</issue>
          <fpage>619</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jpsychires.2010.09.017</pub-id>
          <pub-id pub-id-type="medline">21055768</pub-id>
          <pub-id pub-id-type="pii">S0022-3956(10)00284-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ellis</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Waternaux</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Oquendo</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Malone</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Brodsky</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Haas</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Currier</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Classification trees distinguish suicide attempters in major psychiatric disorders: a model of clinical decision making</article-title>
          <source>J Clin Psychiatry</source>
          <year>2008</year>
          <month>01</month>
          <volume>69</volume>
          <issue>1</issue>
          <fpage>23</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18312034"/>
          </comment>
          <pub-id pub-id-type="doi">10.4088/jcp.v69n0104</pub-id>
          <pub-id pub-id-type="medline">18312034</pub-id>
          <pub-id pub-id-type="pmcid">PMC3773877</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobs</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Marques</surname>
              <given-names>CK</given-names>
            </name>
            <name name-style="western">
              <surname>Oates</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Kamen</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Obeid</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Word2Vec inversion and traditional text classifiers for phenotyping lupus</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2017</year>
          <month>08</month>
          <day>22</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>126</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-017-0518-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-017-0518-1</pub-id>
          <pub-id pub-id-type="medline">28830409</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-017-0518-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5568290</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Obeid</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Weeda</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>Matuskowitz</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gagnon</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Crawford</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Carr</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Frey</surname>
              <given-names>LJ</given-names>
            </name>
          </person-group>
          <article-title>Automated detection of altered mental status in emergency department clinical notes: a deep learning approach</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <month>08</month>
          <day>19</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>164</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-0894-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-0894-9</pub-id>
          <pub-id pub-id-type="medline">31426779</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-0894-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC6701023</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>LeCun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Deep learning</article-title>
          <source>Nature</source>
          <year>2015</year>
          <month>05</month>
          <day>28</day>
          <volume>521</volume>
          <issue>7553</issue>
          <fpage>436</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
          <pub-id pub-id-type="medline">26017442</pub-id>
          <pub-id pub-id-type="pii">nature14539</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Efficient estimation of word representations in vector space</article-title>
          <source>arXiv</source>
          <year>2013</year>
          <month>01</month>
          <day>16</day>
          <access-date>2018-11-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1301.3781.pdf">https://arxiv.org/pdf/1301.3781.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <month>05</month>
          <day>24</day>
          <access-date>2020-06-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/N19-1423.pdf">https://www.aclweb.org/anthology/N19-1423.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Banerjee</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Madhavan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Goldman</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>DL</given-names>
            </name>
          </person-group>
          <article-title>Intelligent word embeddings of free-text radiology reports</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2017</year>
          <volume>2017</volume>
          <fpage>411</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29854105"/>
          </comment>
          <pub-id pub-id-type="medline">29854105</pub-id>
          <pub-id pub-id-type="pmcid">PMC5977573</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shing</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nair</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zirikly</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Friedenberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Daumé</surname>
              <given-names>IH</given-names>
            </name>
            <name name-style="western">
              <surname>Resnik</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Expert, Crowdsourced, and Machine Assessment of Suicide Risk via Online Postings</article-title>
          <source>Proceedings of the Fifth Workshop on Computational Linguistics and Clinical Psychology: From Keyboard to Clinic</source>
          <year>2018</year>
          <conf-name>CLPsych'18</conf-name>
          <conf-date>June 5, 2018</conf-date>
          <conf-loc>New Orleans, LA</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w18-0603</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Extracting psychiatric stressors for suicide from social media using deep learning</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2018</year>
          <month>07</month>
          <day>23</day>
          <volume>18</volume>
          <issue>Suppl 2</issue>
          <fpage>43</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-018-0632-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-018-0632-8</pub-id>
          <pub-id pub-id-type="medline">30066665</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-018-0632-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC6069295</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <article-title>R: A Language and Environment for Statistical Computing</article-title>
          <source>The R Project</source>
          <year>2019</year>
          <access-date>2019-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.r-project.org/">https://www.r-project.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <source>Epic</source>
          <access-date>2019-06-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.epic.com/">https://www.epic.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Obeid</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Beskow</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Rape</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gouripeddi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Black</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Cimino</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Embi</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Marnocha</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Buse</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>A survey of practices for the use of electronic health records to support research recruitment</article-title>
          <source>J Clin Transl Sci</source>
          <year>2017</year>
          <month>08</month>
          <volume>1</volume>
          <issue>4</issue>
          <fpage>246</fpage>
          <lpage>52</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29657859"/>
          </comment>
          <pub-id pub-id-type="doi">10.1017/cts.2017.301</pub-id>
          <pub-id pub-id-type="medline">29657859</pub-id>
          <pub-id pub-id-type="pii">00301</pub-id>
          <pub-id pub-id-type="pmcid">PMC5890320</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hedegaard</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Schoenbaum</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Claassen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Crosby</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Holland</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Proescholdbell</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Issues in developing a surveillance case definition for nonfatal suicide attempt and intentional self-harm using international classification of diseases, tenth revision, clinical modification (ICD-10-CM) coded data</article-title>
          <source>Natl Health Stat Report</source>
          <year>2018</year>
          <month>02</month>
          <issue>108</issue>
          <fpage>1</fpage>
          <lpage>19</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cdc.gov/nchs/data/nhsr/nhsr108.pdf"/>
          </comment>
          <pub-id pub-id-type="medline">29616901</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Imai</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Stuart</surname>
              <given-names>EA</given-names>
            </name>
          </person-group>
          <article-title>Matching as nonparametric preprocessing for reducing model dependence in parametric causal inference</article-title>
          <source>Polit Anal</source>
          <year>2017</year>
          <month>01</month>
          <day>04</day>
          <volume>15</volume>
          <issue>3</issue>
          <fpage>199</fpage>
          <lpage>236</lpage>
          <pub-id pub-id-type="doi">10.1093/pan/mpl013</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Thielke</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Payne</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Conde</surname>
              <given-names>JG</given-names>
            </name>
          </person-group>
          <article-title>Research electronic data capture (REDCap)--a metadata-driven methodology and workflow process for providing translational research informatics support</article-title>
          <source>J Biomed Inform</source>
          <year>2009</year>
          <month>04</month>
          <volume>42</volume>
          <issue>2</issue>
          <fpage>377</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(08)00122-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2008.08.010</pub-id>
          <pub-id pub-id-type="medline">18929686</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(08)00122-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC2700030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Benoit</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Watanabe</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nulty</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Obeng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Müller</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Matsuo</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>quanteda: an R package for the quantitative analysis of textual data</article-title>
          <source>J Open Source Softw</source>
          <year>2018</year>
          <month>10</month>
          <volume>3</volume>
          <issue>30</issue>
          <fpage>774</fpage>
          <pub-id pub-id-type="doi">10.21105/joss.00774</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Culpeper</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Keyness: words, parts-of-speech and semantic categories in the character-talk of Shakespeare’s Romeo and Juliet</article-title>
          <source>Int J Corpus Linguist</source>
          <year>2009</year>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>29</fpage>
          <lpage>59</lpage>
          <pub-id pub-id-type="doi">10.1075/ijcl.14.1.03cul</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Raghavan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Schutze</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>Introduction to Information Retrieval</source>
          <year>2008</year>
          <publisher-loc>Cambridge, USA</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McCallum</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nigam</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>A comparison of event models for Naive Bayes text classification</article-title>
          <source>AAAI-98 Workshop 'Learning for Text Categorization'</source>
          <year>1998</year>
          <access-date>2020-06-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cs.cmu.edu/~knigam/papers/multinomial-aaaiws98.pdf">https://www.cs.cmu.edu/~knigam/papers/multinomial-aaaiws98.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <source>Classification and Regression Trees</source>
          <year>1984</year>
          <publisher-loc>New York, USA</publisher-loc>
          <publisher-name>Chapman &#38; Hall</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Mach Learn</source>
          <year>2001</year>
          <volume>45</volume>
          <issue>1</issue>
          <fpage>5-32</fpage>
          <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Watkins</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Multi-class support vector machines</article-title>
          <source>Support Vector Machines Applications</source>
          <year>1998</year>
          <publisher-loc>Switzerland</publisher-loc>
          <publisher-name>Springer International Publishing</publisher-name>
          <fpage>-</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Joachims</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Nédellec</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rouveirol</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <source>Text categorization with Support Vector Machines: Learning With Many Relevant Features</source>
          <year>1998</year>
          <publisher-loc>Berlin Heidelberg</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>E</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chollet</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <source>Keras</source>
          <year>2018</year>
          <access-date>2018-11-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://keras.io/">https://keras.io/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="web">
          <source>TensorFlow</source>
          <year>2018</year>
          <access-date>2018-11-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tensorflow.org/">https://www.tensorflow.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van der Maaten</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Visualizing data using t-SNE</article-title>
          <source>J Mach Learn Res</source>
          <year>2008</year>
          <volume>9</volume>
          <fpage>2579</fpage>
          <lpage>605</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cs.toronto.edu/~hinton/absps/tsne.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dernoncourt</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Sequential short-text classification with recurrent and convolutional neural networks</article-title>
          <source>arXiv</source>
          <year>2016</year>
          <month>03</month>
          <day>11</day>
          <access-date>2020-06-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/N16-1062.pdf">https://www.aclweb.org/anthology/N16-1062.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kingma</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ba</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Adam: A method for stochastic optimization</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <access-date>2020-06-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1412.6980.pdf">https://arxiv.org/pdf/1412.6980.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuhn</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The Caret Package</article-title>
          <source>GitHub</source>
          <access-date>2018-12-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://topepo.github.io/caret/index.html">http://topepo.github.io/caret/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fawcett</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>An introduction to ROC analysis</article-title>
          <source>Pattern Recognit Lett</source>
          <year>2006</year>
          <month>06</month>
          <volume>27</volume>
          <issue>8</issue>
          <fpage>861</fpage>
          <lpage>74</lpage>
          <pub-id pub-id-type="doi">10.1016/j.patrec.2005.10.010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klonsky</surname>
              <given-names>ED</given-names>
            </name>
            <name name-style="western">
              <surname>May</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Saffer</surname>
              <given-names>BY</given-names>
            </name>
          </person-group>
          <article-title>Suicide, suicide attempts, and suicidal ideation</article-title>
          <source>Annu Rev Clin Psychol</source>
          <year>2016</year>
          <volume>12</volume>
          <fpage>307</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1146/annurev-clinpsy-021815-093204</pub-id>
          <pub-id pub-id-type="medline">26772209</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vu</surname>
              <given-names>NT</given-names>
            </name>
            <name name-style="western">
              <surname>Adel</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Schütze</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Combining recurrent and convolutional neural networks for relation classification</article-title>
          <source>Association for Computational Linguistics</source>
          <year>2016</year>
          <month>05</month>
          <day>24</day>
          <access-date>2020-06-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/N16-1065.pdf">https://www.aclweb.org/anthology/N16-1065.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Learning text representation using recurrent convolutional neural network with highway layers</article-title>
          <source>University College London</source>
          <year>2016</year>
          <month>08</month>
          <day>2</day>
          <access-date>2020-06-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://discovery.ucl.ac.uk/id/eprint/1526824/1/Wang_neuir2016.pdf">https://discovery.ucl.ac.uk/id/eprint/1526824/1/Wang_neuir2016.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Falcone</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nardella</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lamis</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Erbuto</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Girardi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pompili</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Taking care of suicidal patients with new technologies and reaching-out means in the post-discharge period</article-title>
          <source>World J Psychiatry</source>
          <year>2017</year>
          <month>09</month>
          <day>22</day>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>163</fpage>
          <lpage>76</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.wjgnet.com/2220-3206/full/v7/i3/163.htm"/>
          </comment>
          <pub-id pub-id-type="doi">10.5498/wjp.v7.i3.163</pub-id>
          <pub-id pub-id-type="medline">29043154</pub-id>
          <pub-id pub-id-type="pmcid">PMC5632601</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sall</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Brenner</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Millikan Bell</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Colston</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Assessment and management of patients at risk for suicide: synopsis of the 2019 US department of veterans affairs and US department of defense clinical practice guidelines</article-title>
          <source>Ann Intern Med</source>
          <year>2019</year>
          <month>09</month>
          <day>3</day>
          <volume>171</volume>
          <issue>5</issue>
          <fpage>343</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.7326/M19-0687</pub-id>
          <pub-id pub-id-type="medline">31450237</pub-id>
          <pub-id pub-id-type="pii">2748922</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Heilbron</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Compton</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Daniel</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Goldston</surname>
              <given-names>DB</given-names>
            </name>
          </person-group>
          <article-title>The problematic label of suicide gesture: alternatives for clinical research and practice</article-title>
          <source>Prof Psychol Res Pr</source>
          <year>2010</year>
          <month>06</month>
          <day>1</day>
          <volume>41</volume>
          <issue>3</issue>
          <fpage>221</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20640243"/>
          </comment>
          <pub-id pub-id-type="doi">10.1037/a0018712</pub-id>
          <pub-id pub-id-type="medline">20640243</pub-id>
          <pub-id pub-id-type="pmcid">PMC2904564</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Franklin</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Ribeiro</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Kleiman</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Bentley</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Nock</surname>
              <given-names>MK</given-names>
            </name>
          </person-group>
          <article-title>Meta-analysis of risk factors for nonsuicidal self-injury</article-title>
          <source>Clin Psychol Rev</source>
          <year>2015</year>
          <month>12</month>
          <volume>42</volume>
          <fpage>156</fpage>
          <lpage>67</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26416295"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cpr.2015.09.002</pub-id>
          <pub-id pub-id-type="medline">26416295</pub-id>
          <pub-id pub-id-type="pii">S0272-7358(15)00120-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC4772426</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mullenbach</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wiegreffe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Duke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Eisenstein</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Explainable prediction of medical codes from clinical text</article-title>
          <source>arXiv</source>
          <year>2018</year>
          <month>04</month>
          <day>16</day>
          <access-date>2020-06-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1802.05695.pdf">https://arxiv.org/pdf/1802.05695.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
