<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn></journal-meta><article-meta><article-id pub-id-type="publisher-id">44322</article-id><article-id pub-id-type="doi">10.2196/44322</article-id><title-group><article-title>Predicting Undesired Treatment Outcomes With Machine Learning in Mental Health Care: Multisite Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Van Mens</surname><given-names>Kasper</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lokkerbol</surname><given-names>Joran</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wijnen</surname><given-names>Ben</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Janssen</surname><given-names>Richard</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>de Lange</surname><given-names>Robert</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Tiemens</surname><given-names>Bea</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff8">8</xref><xref ref-type="aff" rid="aff9">9</xref></contrib></contrib-group><aff id="aff1"><institution>Behavioural Science Institute, Radboud University</institution>, <addr-line>Nijmegen</addr-line>, <country>Netherlands</country></aff><aff id="aff2"><institution>Data Science, Altrecht Mental Healthcare</institution>, <addr-line>Utrecht</addr-line>, <country>Netherlands</country></aff><aff id="aff3"><institution>Centre of Economic Evaluation &#x0026; Machine Learning, Trimbos Institute (Netherlands Institute of Mental Health)</institution>, <addr-line>Utrecht</addr-line>, <country>Netherlands</country></aff><aff id="aff4"><institution>Department of Clinical Epidemiology and Medical Technology Assessment, Maastricht University Medical Centre</institution>, <addr-line>Maastricht</addr-line>, <country>Netherlands</country></aff><aff id="aff5"><institution>Health Care Governance, Erasmus School of Health Policy and Management, Erasmus University Rotterdam</institution>, <addr-line>Rotterdam</addr-line>, <country>Netherlands</country></aff><aff id="aff6"><institution>Scientific Centre for Care and Welfare, Tilburg University, Tranzo</institution>, <addr-line>Tilburg</addr-line>, <country>Netherlands</country></aff><aff id="aff7"><institution>Alan Turing Institute</institution>, <addr-line>Almere</addr-line>, <country>Netherlands</country></aff><aff id="aff8"><institution>Indigo Service Organization</institution>, <addr-line>Utrecht</addr-line>, <country>Netherlands</country></aff><aff id="aff9"><institution>Pro Persona Research</institution>, <addr-line>Renkum</addr-line>, <country>Netherlands</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Lovis</surname><given-names>Christian</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Meyer</surname><given-names>Denny</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Bond</surname><given-names>Raymond</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Kasper Van Mens, MSc<email>ka.van.mens@altrecht.nl</email></corresp></author-notes><pub-date pub-type="collection"><year>2023</year></pub-date><pub-date pub-type="epub"><day>23</day><month>8</month><year>2023</year></pub-date><volume>11</volume><elocation-id>e44322</elocation-id><history><date date-type="received"><day>15</day><month>11</month><year>2022</year></date><date date-type="rev-recd"><day>03</day><month>02</month><year>2023</year></date><date date-type="accepted"><day>24</day><month>03</month><year>2023</year></date></history><copyright-statement>&#x00A9; Kasper Van Mens, Joran Lokkerbol, Ben Wijnen, Richard Janssen, Robert de Lange, Bea Tiemens. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 23.8.2023. </copyright-statement><copyright-year>2023</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2023/1/e44322"/><abstract><sec><title>Background</title><p>Predicting which treatment will work for which patient in mental health care remains a challenge.</p></sec><sec><title>Objective</title><p>The aim of this multisite study was 2-fold: (1) to predict patients&#x2019; response to treatment in Dutch basic mental health care using commonly available data from routine care and (2) to compare the performance of these machine learning models across three different mental health care organizations in the Netherlands by using clinically interpretable models.</p></sec><sec sec-type="methods"><title>Methods</title><p>Using anonymized data sets from three different mental health care organizations in the Netherlands (n=6452), we applied a least absolute shrinkage and selection operator regression 3 times to predict the treatment outcome. The algorithms were internally validated with cross-validation within each site and externally validated on the data from the other sites.</p></sec><sec sec-type="results"><title>Results</title><p>The performance of the algorithms, measured by the area under the curve of the internal validations as well as the corresponding external validations, ranged from 0.77 to 0.80.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Machine learning models provide a robust and generalizable approach in automated risk signaling technology to identify cases at risk of poor treatment outcomes. The results of this study hold substantial implications for clinical practice by demonstrating that the performance of a model derived from one site is similar when applied to another site (ie, good external validation).</p></sec></abstract><kwd-group><kwd>treatment outcomes</kwd><kwd>mental health</kwd><kwd>machine learning</kwd><kwd>treatment</kwd><kwd>model</kwd><kwd>Netherlands</kwd><kwd>data</kwd><kwd>risk</kwd><kwd>risk signaling</kwd><kwd>technology</kwd><kwd>clinical practice</kwd><kwd>model performance</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Optimizing Health Care Systems</title><p>One of the main challenges in designing an efficient health care system is to prevent offering too many resources to some patients and too little to others. In other words, the challenge is to maximize the opportunity for appropriate care at an individual level [<xref ref-type="bibr" rid="ref1">1</xref>]. The recent strive for precision or personalized medicine aims to improve health care systems by tailoring treatments to patients more effectively. Patients are grouped in terms of their expected treatment response using diagnostic tests or techniques [<xref ref-type="bibr" rid="ref2">2</xref>]. However, precision medicine remains a challenge in mental health care because treatments are effective <italic>on average,</italic> but it is difficult to predict exactly whom they will work for [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Stepped care principles provide a framework to allocate limited health care resources and have been proven to be cost-effective for depression and anxiety [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. In stepped care, treatments start with low intensity unless there is a reason to intensify. Such reasons are identified during treatment when there is a lack of confidence in a positive outcome given the current treatment trajectory. To this extent, routine outcome monitoring (ROM) could be used to observe patterns of early treatment response and identify which patients will probably not benefit from their current treatment [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>].</p></sec><sec id="s1-2"><title>Identification of Nonresponders</title><p>The system can be improved by earlier and more accurate identification of those nonresponders so that patients do not have to endure periods of care in which they do not improve and could potentially lose interest and drop out. On top of that, scarce health care resources are not wasted by engaging in treatment without the desired effect. However, misclassification comes with a cost. Incorrectly classifying patients as needing more intensified treatment results in the unnecessary use of health care resources on patients who would have benefited from a shorter low-intensity treatment. In many Dutch clinics providing basic mental health care, ROM measurements are part of routine care. This raises the question of whether these ROM data could be used to provide accurate prognostic feedback and support a clinician in maximizing the opportunity for appropriate care on the individual level.</p></sec><sec id="s1-3"><title>Predicting Outcomes With Machine Learning During Treatment</title><p>Techniques from the field of machine learning are aimed at making accurate predictions based on patterns in data. Machine learning can help to identify robust, reproducible, and generalizable predictors of treatment response [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>], and has already been used in health care research, for example, in predicting health care costs and outcomes [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref15">15</xref>]. By discovering associations and understanding patterns and trends within the data, machine learning has the potential to improve care. Machine learning permits a finer detection of which patients are at an elevated risk of experiencing persistent poor and costly health outcomes, and may thus give impetus to a more efficient, personalized, and proactive type of mental health care. Inspired by this knowledge, the study aims to use machine learning on ROM data as a feedback device to signal which patients have an elevated risk of a poor response to treatment [<xref ref-type="bibr" rid="ref16">16</xref>]. However, the use of complex data, and the associated increasingly complex models, challenges researchers to ensure that these models are clinically interpretable rather than a &#x201C;black box&#x201D; [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>].</p></sec><sec id="s1-4"><title>Independent Validation</title><p>After developing a prediction model, it is recommended to evaluate model performance in other clinical data that was not used to develop the model, as mentioned in the TRIPOD (Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis) statement. For example, such a validation would require researchers to have access to a similar data set (ie, in terms of predictor variables and outcomes) stemming from a similar population/clinic and compare model performance on this external independent data set (ie, cross-site design). The lack of independent validation is a major limitation of the extant machine learning literature in health care [<xref ref-type="bibr" rid="ref19">19</xref>]. In a recent review on machine learning for suicide prediction, the majority of studies reviewed split the data into training and testing sets, whereas none of the studies used a cross-site design in which a model was trained using data from one site and evaluated using data from another [<xref ref-type="bibr" rid="ref20">20</xref>]. Another recent review looking at applications of machine learning algorithms to predict therapeutic outcomes in depression concluded that most studies did not assess out-of-sample estimates of model fit, which limited their generalizability and likely overestimated predictive accuracy [<xref ref-type="bibr" rid="ref15">15</xref>]. Therefore, the aim of this study was 2-fold: (1) to predict patients&#x2019; response to treatment in Dutch basic mental health care using limited commonly available data from routine care and (2) to compare the performance of these machine learning models across three different mental health care organizations in the Netherlands by using clinically interpretable models. By using commonly available data from routine care, the technical implementation of the model in clinical practice would be straightforward.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design and Data Collection</title><p>Data on mental health treatment and outcomes were collected by a data collection protocol. Mental health care sites from 6 regions in the Netherlands were involved. Patients were treated for mild to severe mental health problems, low risk of suicide, or dangerous behavior. The data set consisted of patient records with a completed treatment from 2014 to 2018. A completed treatment in this setting consists of around 5-12 sessions [<xref ref-type="bibr" rid="ref21">21</xref>]. The protocol consisted of a predefined set of variables with clear definitions and coding for each variable.</p><p>For treatment records to be included in this study, the availability of at least the ROM data as well as certain other variables that could be used for predictions was required. As ROM questionnaires are not mandatory in routine care, ROM data were not available for all patients at all measurements. Records were included when ROM data were available at the start of, during, and at the end of treatment. Of the 6 participating regions, 3 had sufficient treatment records (&#x003E;1000) with nonmissing values and were included in the study (region 1: n=3020; region 2: n=1484; region 3: n=1948). In each region, patients were treated in multiple settings in both urban and rural areas. A set of 26,912 records had to be excluded from the three sites because there was a missing ROM measurement at either the start or end, such that the outcome could not be determined, or there was no measurement during treatment, such that early treatment response patterns could not be determined. To assess the comparability of the included and excluded treatment records in our analysis, a comparison was made regarding age, sex, diagnosis, and baseline severity between both groups (<xref ref-type="table" rid="table1">Table 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Comparison of patient characteristics between the included and excluded treatment records.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2"/><td align="left" valign="bottom">Included (n=6452)</td><td align="left" valign="bottom">Excluded (n=26,912)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4"><bold>Sex, n (%)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Female</td><td align="left" valign="top">4077 (63.2)</td><td align="left" valign="top">16,872 (62.7)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Male</td><td align="char" char="." valign="top">2375 (36.8)</td><td align="char" char="." valign="top">10,040 (37.3)</td></tr><tr><td align="left" valign="top" colspan="4"><bold>Age category (years), n (%)</bold></td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">&#x003C;30</td><td align="left" valign="top">1978 (30.7)</td><td align="left" valign="top">8671 (32.2)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">30-40</td><td align="left" valign="top">1541 (23.9)</td><td align="left" valign="top">6701 (24.9)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">40-50</td><td align="left" valign="top">1238 (19.2)</td><td align="left" valign="top">5298 (19.7)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">50-60</td><td align="left" valign="top">1154 (17.9)</td><td align="left" valign="top">4119 (15.3)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">&#x2265;60</td><td align="left" valign="top">541 (8.4)</td><td align="left" valign="top">2123 (7.9)</td></tr><tr><td align="left" valign="top" colspan="4"><bold>Diagnosis group, n (%)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Anxiety</td><td align="left" valign="top">2588 (40.1)</td><td align="left" valign="top">9955 (37)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Depression</td><td align="left" valign="top">2585 (40.1)</td><td align="left" valign="top">10,831 (40.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Other</td><td align="left" valign="top">1279 (19.8)</td><td align="left" valign="top">6126 (22.8)</td></tr><tr><td align="left" valign="top" colspan="2">Total OQ-45.2<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> score baseline, mean (SD)</td><td align="left" valign="top">80.36 (21.18)</td><td align="left" valign="top">80.60 (23.23)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>OQ-45.2: Outcome Questionnaire.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-2"><title>Data Description</title><p>This study used treatment records, as opposed to patient records. A treatment record was started whenever a patient began treatment within one of the participating centers. As a result, some patients could have multiple treatment records (355/6452, 5.5% of the records were not unique). ROM assessed the development in symptom severity and functioning using the standardized Dutch version of the Outcome Questionnaire (OQ-45.2) [<xref ref-type="bibr" rid="ref22">22</xref>]. The OQ-45.2 contains three subscales: Symptom Distress, Interpersonal Relations, and Social Role. The psychometric properties of the Dutch OQ-45.2 are adequate [<xref ref-type="bibr" rid="ref23">23</xref>].</p><p>The idea of this study was to support a stepped care framework by predicting, during treatment, undesired outcomes at the end of treatment. These predictions can trigger a reconsideration of the chosen treatment plan to improve the probability of a desired outcome after finishing the treatment. Desired treatment outcomes are highly personal and dependent on the type of treatment and setting. For this study, we choose to define undesired outcomes as nonimprovement. Based on the principles of reliable change [<xref ref-type="bibr" rid="ref24">24</xref>], we defined nonimprovement as improving less than a medium effect size on the Symptom Distress subscale of the OQ-45.2 [<xref ref-type="bibr" rid="ref25">25</xref>]. Our study used data from the so-called <italic>basic mental health care</italic> in the Netherlands. Basic mental health care is cost-effective short-term mental health care with an average Cohen <italic>d</italic> effect size of 0.9 [<xref ref-type="bibr" rid="ref21">21</xref>]. Despite this high effect size, the aim of this short-term treatment of 5-12 sessions is primarily to increase self-direction and get patients back on track without care as soon as possible. In this study, individual treatment goals were unknown, and therefore, it was decided to define nonimprovement as less than a medium effect size. This is a little more than half of the average improvement in this mental health care setting. Our clinical outcome was derived from the observed change in the Symptom Distress scale on the OQ-45.2. Patients with less than half of an SD improvement in symptom severity at the end of treatment were classified as having an &#x201C;undesired clinical outcome&#x201D; (called <italic>nonimprovement</italic> henceforth). With the SD of the Symptom Distress subscale in a Dutch clinical population being 16 [<xref ref-type="bibr" rid="ref23">23</xref>], nonimprovement was defined as a patient not improving at least 8 points on the Symptom Distress subscale of the OQ-45.2.</p><p>An early change was defined as the difference in ROM at baseline and the first ROM during treatment. For both the summed scale scores on the OQ-45.2 as well as the individual items, early change variables were created. Besides the ROM data, a set of clinical and demographic variables were included for prediction such as main diagnosis, age, and living condition. The total set consisted of 163 variables, of which 144 were related to the scores on the OQ-45.2 and 19 to the context of the patient.</p></sec><sec id="s2-3"><title>Modeling and Validation Strategy</title><p>The data set was split across all included locations so that models could be trained on a single location and externally validated on each of the other locations. Nonimprovement was predicted for each location separately based on all available predictors using least absolute shrinkage and selection operator (LASSO) models. LASSO was used both to guarantee interpretability for intended model users and to facilitate explicit comparison between prediction models built in different locations. Moreover, as several measures were derived from the same questionnaire, this could have led to multicollinearity between predictors in the data set. LASSO is a technique that has been argued to be able to deal with multicollinearity and still provide stable and interpretable estimators [<xref ref-type="bibr" rid="ref26">26</xref>]. All numeric variables were centered and scaled.</p><p>Using 10-fold cross-validation with 10 repeats, the optimal hyperparameter was determined by considering 100 possible penalty values (ie, &#x03BB;) between 0.001 and 1000. For the LASSO with the optimized penalty, the probability threshold was tuned by optimizing <italic>F</italic><sub>1</sub>-scores over 36 possible probability values between 0.3 and 0.65. The final LASSO model selected for each site was then applied to each of the other sites for model assessment, reporting sensitivity, specificity, positive predictive value (PPV), negative predictive value (NPV), and area under the curve (AUC) using the optimized probability threshold.</p><p>Bootstrapping was used to estimate model performance in the site in which the model was built to have an internally validated measure of model performance to compare with the two externally validated measures of model performance by estimating CIs for all performance scores (ie, sensitivity, specificity, PPV, and NPV). The bootstraps were performed by sampling each data set 1000 times with replacement, resulting in 1000 simulated data sets for each site. The final LASSO model of each of the 3 site-specific models was then applied to the bootstrapped data set, resulting in 1000 confusion matrixes per site. Next, the 2.5 percentile and 97.5 percentile for each performance indicator (ie, sensitivity, specificity, PPV, and NPV) were used to determine the 95% CI for each estimate.</p><p>All analyses were performed in R (version 4.0.0; R Foundation for Statistical Computing) [<xref ref-type="bibr" rid="ref27">27</xref>]. The package <italic>caret</italic> was used to build the models [<xref ref-type="bibr" rid="ref28">28</xref>]. The package <italic>glmnet</italic> was used to perform the LASSO regression [<xref ref-type="bibr" rid="ref29">29</xref>]. The package <italic>pROC</italic> was used to analyze the AUCs [<xref ref-type="bibr" rid="ref30">30</xref>].</p></sec><sec id="s2-4"><title>Ethical Considerations</title><p>Since the database was anonymized with statistical disclosure control techniques [<xref ref-type="bibr" rid="ref31">31</xref>], there was no need for informed consent or approval by a medical ethics committee (Dutch Civil Law, Article 7:458).</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>The total data set used in the analyses contained information on 6452 treatment records and included anonymized demographic variables, care-related variables, and information about the severity and types of complaints. The characteristics of the patient populations within each site are shown in <xref ref-type="table" rid="table2">Table 2</xref>. There are notable differences between baseline symptom severity, the distribution of the main diagnosis, and the percentage of patients with a paid job between sites.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Overview of research population (n=6452).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="3"/><td align="left" valign="bottom">Region 1 (n=3020)</td><td align="left" valign="bottom">Region 2 (n=1484)</td><td align="left" valign="bottom">Region 3 (n=1948)</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="7"><bold>Care-related variables</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Nonimprovement, n (%)</td><td align="left" valign="top">1028 (34.04)</td><td align="left" valign="top">499 (33.63)</td><td align="left" valign="top">577 (29.62)</td><td align="left" valign="top">.003</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Treatment duration (days), mean (SD)</td><td align="left" valign="top">145.19 (64.87)</td><td align="left" valign="top">208.00 (78.35)</td><td align="left" valign="top">205.78 (77.52)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Treatment sessions (n), mean (SD)</td><td align="left" valign="top">9.73 (2.92)</td><td align="left" valign="top">13.15 (4.03)</td><td align="left" valign="top">11.21 (4.34)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="7"><bold>Type and severity of complaints</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Baseline symptom severity score, mean (SD)</td><td align="left" valign="top">51.42 (13.94)</td><td align="left" valign="top">52.16 (13.45)</td><td align="left" valign="top">48.72 (13.65)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Baseline social role score, mean (SD)</td><td align="left" valign="top">13.76 (5.06)</td><td align="left" valign="top">14.37 (4.97)</td><td align="left" valign="top">13.79 (5.06)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Baseline interpersonal relations score, mean (SD)</td><td align="left" valign="top">15.29 (6.08)</td><td align="left" valign="top">17.01 (6.50)</td><td align="left" valign="top">15.28 (6.11)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Baseline total OQ-45<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> score, mean (SD)</td><td align="left" valign="top">80.47 (21.25)</td><td align="left" valign="top">83.54 (20.76)</td><td align="left" valign="top">77.79 (21.07)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="5"><bold>Diagnosis group, n (%)</bold></td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Anxiety</td><td align="left" valign="top">1300 (43)</td><td align="left" valign="top">562 (37.9)</td><td align="left" valign="top">726 (37.3)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Depression</td><td align="left" valign="top">1142 (37.8)</td><td align="left" valign="top">568 (38.3)</td><td align="left" valign="top">875 (44.9)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Other</td><td align="left" valign="top">578 (19.1)</td><td align="left" valign="top">354 (23.9)</td><td align="left" valign="top">347 (17.8)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="7"><bold>Demographic variables, n (%)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="5"><bold>Sex</bold></td><td align="char" char="." valign="top">.14</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Female</td><td align="left" valign="top">1878 (62.2)</td><td align="left" valign="top">934 (62.9)</td><td align="left" valign="top">1265 (64.9)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Male</td><td align="char" char="." valign="top">1142 (37.8)</td><td align="char" char="." valign="top">550 (37.1)</td><td align="char" char="." valign="top">683 (35.1)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="5"><bold>Age category (years)</bold></td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="char" char="." valign="top">&#x003C;30</td><td align="left" valign="top">954 (31.6)</td><td align="left" valign="top">505 (34)</td><td align="left" valign="top">519 (26.6)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="char" char="." valign="top">30-40</td><td align="left" valign="top">694 (23)</td><td align="left" valign="top">369 (24.9)</td><td align="left" valign="top">478 (24.5)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="char" char="." valign="top">40-50</td><td align="left" valign="top">577 (19.1)</td><td align="left" valign="top">249 (16.8)</td><td align="left" valign="top">412 (21.1)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="char" char="." valign="top">50-60</td><td align="left" valign="top">556 (18.4)</td><td align="left" valign="top">241 (16.2)</td><td align="left" valign="top">357 (18.3)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="char" char="." valign="top">&#x2265;60</td><td align="left" valign="top">239 (7.9)</td><td align="left" valign="top">120 (8.1)</td><td align="left" valign="top">182 (9.3)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="5"><bold>Origin</bold></td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Native</td><td align="left" valign="top">2838 (94)</td><td align="left" valign="top">1 (0.1)</td><td align="left" valign="top">343 (17.6)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Immigrant</td><td align="left" valign="top">68 (2.3)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">97 (5)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Unknown</td><td align="left" valign="top">114 (3.8)</td><td align="left" valign="top">1483 (99.9)</td><td align="left" valign="top">1508 (77.4)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="5"><bold>Marital status</bold></td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Not married</td><td align="left" valign="top">1612 (53.4)</td><td align="left" valign="top">50 (3.4)</td><td align="left" valign="top">969 (49.7)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Married</td><td align="left" valign="top">1052 (34.8)</td><td align="left" valign="top">24 (1.6)</td><td align="left" valign="top">747 (38.3)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Divorced/widowed</td><td align="left" valign="top">356 (11.8)</td><td align="left" valign="top">8 (0.5)</td><td align="left" valign="top">224 (11.5)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Unknown</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">1402 (94.5)</td><td align="left" valign="top">8 (0.4)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="5"><bold>Living situation</bold></td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Alone</td><td align="left" valign="top">981 (32.5)</td><td align="left" valign="top">35 (2.4)</td><td align="left" valign="top">571 (29.3)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">With partner</td><td align="left" valign="top">1638 (54.2)</td><td align="left" valign="top">43 (2.9)</td><td align="left" valign="top">1100 (56.5)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Child</td><td align="left" valign="top">248 (8.2)</td><td align="left" valign="top">9 (0.6)</td><td align="left" valign="top">186 (9.5)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Other</td><td align="left" valign="top">151 (5)</td><td align="left" valign="top">6 (0.4)</td><td align="left" valign="top">83 (4.3)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Unknown</td><td align="left" valign="top">2 (0.1)</td><td align="left" valign="top">1391 (93.8)</td><td align="left" valign="top">8 (0.4)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="5"><bold>Paid job</bold></td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Employed</td><td align="left" valign="top">1071 (35.5)</td><td align="left" valign="top">392 (26.4)</td><td align="left" valign="top">536 (27.5)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Not employed</td><td align="left" valign="top">1949 (64.5)</td><td align="left" valign="top">831 (56)</td><td align="left" valign="top">1412 (72.5)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">Unknown</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">261 (17.6)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top"/></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>OQ-45.2: Outcome Questionnaire.</p></fn></table-wrap-foot></table-wrap><p>The nonzero LASSO coefficients are shown in <xref ref-type="table" rid="table3">Table 3</xref>. The most important coefficients, in terms of relative coefficient size, were related to early changes in the Symptom Distress subscale of the OQ-45.2, and the change in the total score of the OQ-45.2. The self-blame measurement at the start of treatment was the only other nonzero coefficient at each of the 3 regions. The coefficient for paid employment stands out in the region 1 model, and age had a notable coefficient in regions 1 and 3. Furthermore, the models contained smaller nonzero coefficients that varied between each site (eg, some OQ-45.2 variables were nonzero in some of the models but not in all of the models). The results of the hyperparameter tuning are shown in <xref ref-type="table" rid="table4">Table 4</xref>. As shown, the threshold to define a positive class was set between 0.30 (region 4) and 0.34 (region 3), with &#x03BB; varying from 0.02 (region 5) to 0.16 (region 3).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Nonzero least absolute shrinkage and selection operator coefficients of the three models.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2"/><td align="left" valign="bottom">Region 1</td><td align="left" valign="bottom">Region 2</td><td align="left" valign="bottom">Region 3</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Intercept</td><td align="left" valign="top">&#x2013;0.59</td><td align="left" valign="top">&#x2013;0.76</td><td align="left" valign="top">&#x2013;1.14</td></tr><tr><td align="left" valign="top" colspan="2">Age</td><td align="left" valign="top">0.05</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">0.04</td></tr><tr><td align="left" valign="top" colspan="2">Number of days between referral and first appointment (waiting queue)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.05</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="2">Employment (paid job)</td><td align="left" valign="top">&#x2013;0.39</td><td align="left" valign="top">&#x2013;0.02</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="2">Nuisance on job (yes, very much)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2013;0.12</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="2">Work absence (unknown)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.11</td></tr><tr><td align="left" valign="top" colspan="5"><bold>OQ-45.2<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> start measurement</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Self-blame</td><td align="left" valign="top">&#x2013;0.08</td><td align="left" valign="top">&#x2013;0.01</td><td align="left" valign="top">&#x2013;0.07</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Feeling week</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2013;0.01</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Happiness</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.05</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Disturbing thoughts</td><td align="left" valign="top">&#x2013;0.11</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Stomach</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2013;0.05</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Relationships</td><td align="left" valign="top">&#x2013;0.01</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Sadness</td><td align="left" valign="top">&#x2013;0.03</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="5"><bold>OQ-45.2 middle measurement</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Suicidal thoughts</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.03</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Enjoyment</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2013;0.01</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Relationships</td><td align="left" valign="top">&#x2013;0.07</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2013;0.01</td></tr><tr><td align="left" valign="top" colspan="5"><bold>OQ-45.2 early change</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Stamina</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.01</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Satisfaction in work or school</td><td align="left" valign="top">&#x2013;0.01</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2013;0.05</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Disturbing thoughts</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.03</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Stomach</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Hearth</td><td align="left" valign="top">0.01</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Sleeping</td><td align="left" valign="top">0.03</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Sadness</td><td align="left" valign="top">0.03</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Relationships</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2013;0.02</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Headaches</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.03</td></tr><tr><td align="left" valign="top" colspan="2">SD OQ-45.2 score (change)</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.81</td><td align="left" valign="top">1.09</td></tr><tr><td align="left" valign="top" colspan="2">Total OQ-45.2 score (change)</td><td align="left" valign="top">0.07</td><td align="left" valign="top">0.15</td><td align="left" valign="top">&#x2014;</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Not applicable.</p></fn><fn id="table3fn2"><p><sup>b</sup>OQ-45.2: Outcome Questionnaire.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>The parameter settings of the three models.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Lambda</td><td align="left" valign="bottom">Probability</td></tr></thead><tbody><tr><td align="left" valign="top">Model region 1</td><td align="char" char="." valign="top">0.16</td><td align="left" valign="top">0.34</td></tr><tr><td align="left" valign="top">Model region 2</td><td align="char" char="." valign="top">0.03</td><td align="left" valign="top">0.3</td></tr><tr><td align="left" valign="top">Model region 3</td><td align="char" char="." valign="top">0.02</td><td align="left" valign="top">0.32</td></tr></tbody></table></table-wrap><p>The performance of the three models is shown in <xref ref-type="table" rid="table5">Table 5</xref>. Each model (row) has been evaluated internally and two times externally. Each site (columns) has been used three times: one time for internal validation and two times for the external validation of the other models. The diagonal contains the three internal validations. The CIs of the AUCs overlap, which indicate that there were no significant differences in the overall performances of the models. The AUCs of the three models in the three internal validations were 0.77 (region 2) and 0.80 (regions 1 and 2). The AUCs of the six external validations ranged from 0.77 to 0.80. An overview of the associated confusion matrixes is attached in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Comparison of internally (diagonal) and externally validated results within each site with 1000 bootstrapped CIs for regions 1, 2, and 3.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Metrics</td><td align="left" valign="bottom">Region 1 validation</td><td align="left" valign="bottom">Region 2 validation</td><td align="left" valign="bottom">Region 3 validation</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="5"><bold>Region 1 model</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Sensitivity (95% CI)</td><td align="left" valign="top">0.784 (0.760-0.809)</td><td align="char" char="." valign="top">0.762 (0.725-0.800)</td><td align="char" char="." valign="top">0.780 (0.747-0.813)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Specificity (95% CI)</td><td align="left" valign="top">0.698 (0.676-0.719)</td><td align="char" char="." valign="top">0.647 (0.617-0.676)</td><td align="char" char="." valign="top">0.673 (0.650-0.697)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Positive predictive value (95% CI)</td><td align="left" valign="top">0.572 (0.545-0.600)</td><td align="char" char="." valign="top">0.522 (0.486-0.560)</td><td align="char" char="." valign="top">0.501 (0.471-0.534)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Negative predictive value (95% CI)</td><td align="left" valign="top">0.862 (0.846-0.880)</td><td align="char" char="." valign="top">0.843 (0.818-0.868)</td><td align="char" char="." valign="top">0.879 (0.859-0.898)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">AUC<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup> (95% CI)</td><td align="left" valign="top">0.799 (0.783-0.816)</td><td align="char" char="." valign="top">0.771 (0.746-0.794)</td><td align="char" char="." valign="top">0.799 (0.778-0.819)</td></tr><tr><td align="left" valign="top" colspan="5"><bold>Region 2 model</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Sensitivity (95% CI)</td><td align="left" valign="top">0.841 (0.818-0.863)</td><td align="char" char="." valign="top">0.824 (0.789-0.856)</td><td align="char" char="." valign="top">0.868 (0.844-0.896)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Specificity (95% CI)</td><td align="left" valign="top">0.584 (0.563-0.606)</td><td align="char" char="." valign="top">0.586 (0.554-0.615)</td><td align="char" char="." valign="top">0.548 (0.520-0.574)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Positive predictive value (95% CI)</td><td align="left" valign="top">0.511 (0.486-0.534)</td><td align="char" char="." valign="top">0.502 (0.466-0.533)</td><td align="char" char="." valign="top">0.447 (0.419-0.477)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Negative predictive value (95% CI)</td><td align="left" valign="top">0.877 (0.860-0.893)</td><td align="char" char="." valign="top">0.868 (0.841-0.892)</td><td align="char" char="." valign="top">0.908 (0.890-0.927)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">AUC (95% CI)</td><td align="left" valign="top">0.782 (0.765-0.799)</td><td align="char" char="." valign="top">0.774 (0.749-0.798)</td><td align="char" char="." valign="top">0.792 (0.772-0.813)</td></tr><tr><td align="left" valign="top" colspan="5"><bold>Region 3 model</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Sensitivity (95% CI)</td><td align="left" valign="top">0.696 (0.667-0.726)</td><td align="char" char="." valign="top">0.673 (0.633-0.716)</td><td align="char" char="." valign="top">0.742 (0.705-0.779)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Specificity (95% CI)</td><td align="left" valign="top">0.749 (0.730-0.768)</td><td align="char" char="." valign="top">0.726 (0.699-0.754)</td><td align="char" char="." valign="top">0.732 (0.708-0.754)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Positive predictive value (95% CI)</td><td align="left" valign="top">0.589 (0.561-0.617)</td><td align="char" char="." valign="top">0.554 (0.517-0.596)</td><td align="char" char="." valign="top">0.538 (0.503-0.573)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Negative predictive value (95% CI)</td><td align="left" valign="top">0.827 (0.809-0.846)</td><td align="char" char="." valign="top">0.814 (0.789-0.841)</td><td align="char" char="." valign="top">0.871 (0.850-0.890)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">AUC (95% CI)</td><td align="left" valign="top">0.787 (0.771-0.803)</td><td align="char" char="." valign="top">0.768 (0.744-0.792)</td><td align="char" char="." valign="top">0.802 (0.782-0.822)</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>AUC: area under the curve.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Evaluation of Three Models at 3 Sites</title><p>The aim of this study was to use machine learning to predict which patients would not substantially benefit from treatment across 3 different mental health care organizations in the Netherlands by using clinically interpretable models. This study used a cross-site design in which the performance of a model developed in one site was compared to the model performance on an external independent data set (ie, 3 &#x00D7; 3 cross-site design, as per the TRIPOD statement). Data from ROM, among other clinical and demographic data, were used for the predictions.</p><p>Both the AUC of the internal validations of the three models and the corresponding external validations were in the range of 0.77 to 0.80, indicating fair to good model performance [<xref ref-type="bibr" rid="ref32">32</xref>]. In addition, the CIs of the AUCs overlapped in each of the 9 evaluations, indicating that the performance estimates were robust and likely to be generalizable to different settings. This could be explained by the fact that LASSO regression is known to be less prone to overfitting compared to other machine learning algorithms, and when evaluated with 1000 times bootstrapping, the internal validations give a good indication of overall performance.</p><p>All three models generalized well to the other sites. This is an interesting finding and a promising result for the scalability of the implementation of machine learning models. Decentralized data can be gathered, within the boundaries of the General Data Protection Regulation. A model can be developed within the context of one site and then be exported to other sites, even if those other sites differ in certain characteristics. For example, in this research, the 3 sites differed in geographical location from more rural to urban. The patient populations differed, with some significant differences in the distribution of important variables such as main diagnosis, baseline symptom severity, and percentage of patients with paid employment. The data sources differed in the type of electronic health record system used in clinical practice. Despite these substantial differences, we were able to develop three robust machine learning models with acceptable AUCs that could be applied in all 3 settings.</p><p>The sensitivity and specificity of the three models were consistent in each of their external validations. There were differences in these metrics between models, mainly caused by a trade-off between sensitivity and specificity when evaluating model performance with metrics from the confusion matrix. The models of regions 1 and 2 were more shifted toward a higher sensitivity and the model of region 3 toward a higher specificity. However, these differences were a shift in the balance rather than an <italic>absolute difference</italic> between the models, as was indicated by the comparable AUCs.</p><p>To give some insight into the practical utility of the model, the results can be translated to a hypothetical clinical scenario. Imagine a health care professional with a caseload of 30 patients working in region 2, with a model created in region 1. About 10 of the 30 patients will not improve according to our data (34%). The model is used by the clinician to support the identification of potential nonimproving patients during treatment. With a sensitivity of 0.76 and a specificity of 0.65 (the results of model 1 applied to region 2), 15 patients will be classified as nonimprovers and 15 will be classified as improvers. Among the improvers, 13 of them will actually improve (ie, NPV=0.84), and among the nonimprovers, 8 of them would actually not improve (ie, PPV=0.52). For half of the patients who are classified as nonimprovers, therefore, the discussion would not be necessary at that time. So the question is whether these models are already good enough to actually use in practice. The idea is that when the model indicates that a patient is on track, there is little reason to change treatment. When the model indicates an elevated risk of nonimprovement, the clinician and patient should discuss the situation and adapt treatment plans if necessary. It is therefore important to see such machine learning models not as black-and-white decision tools but as complementary tools in the identification and stratification of patients in need of more or less care.</p></sec><sec id="s4-2"><title>Predictive Variables</title><p>Although this research was aimed at making predictions, rather than explaining relations, we used LASSO regression to inform clinicians about how the algorithm works. In the health care setting, this is important as health care professionals often want to understand which parameters affect and how they contribute to a prediction [<xref ref-type="bibr" rid="ref33">33</xref>]. By looking at the coefficients of each LASSO model, it can be concluded that the algorithms rely on the variables&#x2019; early change in the Symptom Distress subscale and the total scores of the OQ-45.2, as well as having a paid job at the start of the treatment and age. In a paper by McMahon [<xref ref-type="bibr" rid="ref34">34</xref>], several other studies are mentioned in which early symptom improvement, or lack of it, has been associated with psychiatric treatment outcomes. In a study by Lorenzo-Lucas et al [<xref ref-type="bibr" rid="ref35">35</xref>], being unemployed, among other factors, predicted a lower likelihood of recovery. There were certain individual OQ-45.2 questionnaire items that were associated with nonzero LASSO coefficients. However, these items differed between the sites, and the size of the coefficients were relatively low. We are, therefore, reluctant to generalize findings on these individual OQ-45.2 items, with small nonzero coefficients, to future prediction research.</p><p>The high relative importance of the early change variable (ie, in terms of the absolute values of the coefficients) is likely to contribute to good external model validation, as it is a straightforwardly defined predictor that is less likely to be subject to sampling variation. Furthermore, given the high importance of early change in the model, one could even advocate for an alternative simpler predictive model (ie, a &#x201C;rule of thumb&#x201D;) using early change only (or combined with weaker predictors, eg, age and employment status).</p></sec><sec id="s4-3"><title>Strengths and Limitations</title><p>The main strength of this study is that we used a 3 &#x00D7; 3 cross-site design to develop and evaluate the algorithms, resulting in three models with an independent validation of their performance. In addition, LASSO regression was used, which is a parametric approach, resulting in a prediction model that is still relatively easy to interpret. Moreover, LASSO is less prone to overfitting, which increased the generalizability of the results. Furthermore, with the use of a data protocol with clear data definition descriptions, we could use readily available data from routine care in the Netherlands, meaning that our approach could easily be adopted in other Dutch basic mental health care organizations using ROM (the R scripts to build and validate the models are available on request). This study has a number of limitations that need to be acknowledged. First, we limited our analysis to treatment records with complete data only. In addition, we could not use every variable described in the data protocol because of missing values on these variables in one of the sites. Moreover, we had to exclude a large set of records because of missing data on the OQ-45.2. However, the excluded group of patients did not substantially differ in sex, age, diagnosis, or baseline symptom severity. Nonetheless, we would like to emphasize that our models cannot be directly applied to other patient populations. Second, our data did not contain information on whether the outcome of the ROM had already been used to alter the treatment strategy. This would underestimate the impact of early change, as patients with only minor or no clinical improvements would have been given a possibly more intensive treatment for them to respond to the treatment. Third, although it is difficult to estimate the required sample size for developing a prognostic model, our data had a relatively small sample size [<xref ref-type="bibr" rid="ref36">36</xref>]. Fourth, this study chose to define an undesired outcome as improving with less than a medium effect size. However, the definition of an undesired outcome is subjective and will differ between different types of treatment settings. Therefore, our definition cannot directly be generalized to other settings, and each research should make an effort to define a relevant undesired outcome for that domain with experts from clinical practice.</p><p>This study was performed within the context of a stepped care framework, in which treatment optimization is required during treatment. Our models heavily rely on predictors derived from early change patterns and can, therefore, not be applied at the start of treatment. Other research could analyze which type of predictors are more suited for a matched care framework and to what extent accurate predictions can be made in treatment response.</p></sec><sec id="s4-4"><title>Conclusion</title><p>Machine learning models provide a robust and generalizable approach in automated risk signaling technology to identify cases at risk of poor treatment outcomes. The results of this study hold substantial implications for clinical practice by demonstrating that the performance of a model derived from one site is similar when applied to another site (ie, good external validation). This is a promising result for the scalability of machine learning models developed in single-center studies. Our findings confirm that routine monitoring provides valuable information that can be used in prognostic models to predict treatment outcomes. Such prognostic models can be used as complementary tools for practitioners in a stepped care framework.</p></sec></sec></body><back><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb2">LASSO</term><def><p>least absolute shrinkage and selection operator</p></def></def-item><def-item><term id="abb3">NPV</term><def><p>negative predictive value</p></def></def-item><def-item><term id="abb4">OQ-45.2</term><def><p>Outcome Questionnaire</p></def></def-item><def-item><term id="abb5">PPV</term><def><p>positive predictive value</p></def></def-item><def-item><term id="abb6">ROM</term><def><p>routine outcome monitoring</p></def></def-item><def-item><term id="abb7">TRIPOD</term><def><p>Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Janssen</surname><given-names>R</given-names></name><name name-style="western"><surname>Busschbach</surname><given-names>J</given-names></name></person-group><article-title>Op weg naar gepaste geestelijke gezondheidszorg</article-title><source>Economisch Statistische Berichten</source><year>2012</year><volume>97</volume><fpage>81</fpage><lpage>86</lpage></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fernandes</surname><given-names>BS</given-names></name><name name-style="western"><surname>Williams</surname><given-names>LM</given-names></name><name name-style="western"><surname>Steiner</surname><given-names>J</given-names></name><name name-style="western"><surname>Leboyer</surname><given-names>M</given-names></name><name name-style="western"><surname>Carvalho</surname><given-names>AF</given-names></name><name name-style="western"><surname>Berk</surname><given-names>M</given-names></name></person-group><article-title>The new field of 'precision psychiatry'.</article-title><source>BMC Med</source><year>2017</year><month>04</month><day>13</day><volume>15</volume><issue>1</issue><fpage>80</fpage><pub-id pub-id-type="doi">10.1186/s12916-017-0849-x</pub-id><pub-id pub-id-type="medline">28403846</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gillan</surname><given-names>CM</given-names></name><name name-style="western"><surname>Whelan</surname><given-names>R</given-names></name></person-group><article-title>What big data can do for treatment in psychiatry</article-title><source>Curr Opin Behav Sci</source><year>2017</year><month>12</month><volume>18</volume><fpage>34</fpage><lpage>42</lpage><pub-id pub-id-type="doi">10.1016/j.cobeha.2017.07.003</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rush</surname><given-names>AJ</given-names></name><name name-style="western"><surname>Trivedi</surname><given-names>MH</given-names></name><name name-style="western"><surname>Wisniewski</surname><given-names>SR</given-names></name><name name-style="western"><surname>Nierenberg</surname><given-names>AA</given-names></name><name name-style="western"><surname>Stewart</surname><given-names>JW</given-names></name><name name-style="western"><surname>Warden</surname><given-names>D</given-names></name><etal/></person-group><article-title>Acute and longer-term outcomes in depressed outpatients requiring one or several treatment steps: a STAR*D report</article-title><source>Am J Psychiatry</source><year>2006</year><month>11</month><volume>163</volume><issue>11</issue><fpage>1905</fpage><lpage>1917</lpage><pub-id pub-id-type="doi">10.1176/ajp.2006.163.11.1905</pub-id><pub-id pub-id-type="medline">17074942</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Von Korff</surname><given-names>M</given-names></name><name name-style="western"><surname>Tiemens</surname><given-names>B</given-names></name></person-group><article-title>Individualized stepped care of chronic illness</article-title><source>West J Med</source><year>2000</year><month>02</month><volume>172</volume><issue>2</issue><fpage>133</fpage><lpage>137</lpage><pub-id pub-id-type="doi">10.1136/ewjm.172.2.133</pub-id><pub-id pub-id-type="medline">10693379</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Orden</surname><given-names>M</given-names></name><name name-style="western"><surname>Hoffman</surname><given-names>T</given-names></name><name name-style="western"><surname>Haffmans</surname><given-names>J</given-names></name><name name-style="western"><surname>Spinhoven</surname><given-names>P</given-names></name><name name-style="western"><surname>Hoencamp</surname><given-names>E</given-names></name></person-group><article-title>Collaborative mental health care versus care as usual in a primary care setting: a randomized controlled trial</article-title><source>Psychiatr Serv</source><year>2009</year><month>01</month><volume>60</volume><issue>1</issue><fpage>74</fpage><lpage>79</lpage><pub-id pub-id-type="doi">10.1176/ps.2009.60.1.74</pub-id><pub-id pub-id-type="medline">19114574</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Delgadillo</surname><given-names>J</given-names></name><name name-style="western"><surname>de Jong</surname><given-names>K</given-names></name><name name-style="western"><surname>Lucock</surname><given-names>M</given-names></name><name name-style="western"><surname>Lutz</surname><given-names>W</given-names></name><name name-style="western"><surname>Rubel</surname><given-names>J</given-names></name><name name-style="western"><surname>Gilbody</surname><given-names>S</given-names></name><etal/></person-group><article-title>Feedback-informed treatment versus usual psychological treatment for depression and anxiety: a multisite, open-label, cluster randomised controlled trial</article-title><source>Lancet Psychiatry</source><year>2018</year><month>06</month><day>21</day><volume>5</volume><issue>7</issue><fpage>564</fpage><lpage>572</lpage><pub-id pub-id-type="doi">10.1016/S2215-0366(18)30162-7</pub-id><pub-id pub-id-type="medline">29937396</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lutz</surname><given-names>W</given-names></name><name name-style="western"><surname>Hofmann</surname><given-names>SG</given-names></name><name name-style="western"><surname>Rubel</surname><given-names>J</given-names></name><name name-style="western"><surname>Boswell</surname><given-names>JF</given-names></name><name name-style="western"><surname>Shear</surname><given-names>MK</given-names></name><name name-style="western"><surname>Gorman</surname><given-names>JM</given-names></name><etal/></person-group><article-title>Patterns of early change and their relationship to outcome and early treatment termination in patients with panic disorder</article-title><source>J Consult Clin Psychol</source><year>2014</year><month>04</month><volume>82</volume><issue>2</issue><fpage>287</fpage><lpage>297</lpage><pub-id pub-id-type="doi">10.1037/a0035535</pub-id><pub-id pub-id-type="medline">24447004</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Torous</surname><given-names>J</given-names></name><name name-style="western"><surname>Baker</surname><given-names>JT</given-names></name></person-group><article-title>Why psychiatry needs data science and data science needs psychiatry</article-title><source>JAMA Psychiatry</source><year>2016</year><month>01</month><volume>73</volume><issue>1</issue><fpage>3</fpage><lpage>4</lpage><pub-id pub-id-type="doi">10.1001/jamapsychiatry.2015.2622</pub-id><pub-id pub-id-type="medline">26676879</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McIntosh</surname><given-names>AM</given-names></name><name name-style="western"><surname>Stewart</surname><given-names>R</given-names></name><name name-style="western"><surname>John</surname><given-names>A</given-names></name><name name-style="western"><surname>Smith</surname><given-names>DJ</given-names></name><name name-style="western"><surname>Davis</surname><given-names>K</given-names></name><name name-style="western"><surname>Sudlow</surname><given-names>C</given-names></name><etal/></person-group><article-title>Data science for mental health: a UK perspective on a global challenge</article-title><source>Lancet Psychiatry</source><year>2016</year><month>10</month><volume>3</volume><issue>10</issue><fpage>993</fpage><lpage>998</lpage><pub-id pub-id-type="doi">10.1016/S2215-0366(16)30089-X</pub-id><pub-id pub-id-type="medline">27692269</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bzdok</surname><given-names>D</given-names></name><name name-style="western"><surname>Meyer-Lindenberg</surname><given-names>A</given-names></name></person-group><article-title>Machine learning for precision psychiatry: opportunities and challenges</article-title><source>Biol Psychiatry Cogn Neurosci Neuroimaging</source><year>2018</year><month>03</month><volume>3</volume><issue>3</issue><fpage>223</fpage><lpage>230</lpage><pub-id pub-id-type="doi">10.1016/j.bpsc.2017.11.007</pub-id><pub-id pub-id-type="medline">29486863</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chekroud</surname><given-names>AM</given-names></name><name name-style="western"><surname>Zotti</surname><given-names>RJ</given-names></name><name name-style="western"><surname>Shehzad</surname><given-names>Z</given-names></name><name name-style="western"><surname>Gueorguieva</surname><given-names>R</given-names></name><name name-style="western"><surname>Johnson</surname><given-names>MK</given-names></name><name name-style="western"><surname>Trivedi</surname><given-names>MH</given-names></name><etal/></person-group><article-title>Cross-trial prediction of treatment outcome in depression: a machine learning approach</article-title><source>Lancet Psychiatry</source><year>2016</year><month>03</month><volume>3</volume><issue>3</issue><fpage>243</fpage><lpage>250</lpage><pub-id pub-id-type="doi">10.1016/S2215-0366(15)00471-X</pub-id><pub-id pub-id-type="medline">26803397</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Koutsouleris</surname><given-names>N</given-names></name><name name-style="western"><surname>Kahn</surname><given-names>RS</given-names></name><name name-style="western"><surname>Chekroud</surname><given-names>AM</given-names></name><name name-style="western"><surname>Leucht</surname><given-names>S</given-names></name><name name-style="western"><surname>Falkai</surname><given-names>P</given-names></name><name name-style="western"><surname>Wobrock</surname><given-names>T</given-names></name><etal/></person-group><article-title>Multisite prediction of 4-week and 52-week treatment outcomes in patients with first-episode psychosis: a machine learning approach</article-title><source>Lancet Psychiatry</source><year>2016</year><month>10</month><volume>3</volume><issue>10</issue><fpage>935</fpage><lpage>946</lpage><pub-id pub-id-type="doi">10.1016/S2215-0366(16)30171-7</pub-id><pub-id pub-id-type="medline">27569526</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Iniesta</surname><given-names>R</given-names></name><name name-style="western"><surname>Malki</surname><given-names>K</given-names></name><name name-style="western"><surname>Maier</surname><given-names>W</given-names></name><name name-style="western"><surname>Rietschel</surname><given-names>M</given-names></name><name name-style="western"><surname>Mors</surname><given-names>O</given-names></name><name name-style="western"><surname>Hauser</surname><given-names>J</given-names></name><etal/></person-group><article-title>Combining clinical variables to optimize prediction of antidepressant treatment outcomes</article-title><source>J Psychiatr Res</source><year>2016</year><month>07</month><volume>78</volume><fpage>94</fpage><lpage>102</lpage><pub-id pub-id-type="doi">10.1016/j.jpsychires.2016.03.016</pub-id><pub-id pub-id-type="medline">27089522</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>Y</given-names></name><name name-style="western"><surname>Ragguett</surname><given-names>RM</given-names></name><name name-style="western"><surname>Mansur</surname><given-names>RB</given-names></name><name name-style="western"><surname>Boutilier</surname><given-names>JJ</given-names></name><name name-style="western"><surname>Rosenblat</surname><given-names>JD</given-names></name><name name-style="western"><surname>Trevizol</surname><given-names>A</given-names></name><etal/></person-group><article-title>Applications of machine learning algorithms to predict therapeutic outcomes in depression: a meta-analysis and systematic review</article-title><source>J Affect Disord</source><year>2018</year><month>12</month><day>1</day><volume>241</volume><fpage>519</fpage><lpage>532</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2018.08.073</pub-id><pub-id pub-id-type="medline">30153635</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Delgadillo</surname><given-names>J</given-names></name><name name-style="western"><surname>de Jong</surname><given-names>K</given-names></name><name name-style="western"><surname>Lucock</surname><given-names>M</given-names></name><name name-style="western"><surname>Lutz</surname><given-names>W</given-names></name><name name-style="western"><surname>Rubel</surname><given-names>J</given-names></name><name name-style="western"><surname>Gilbody</surname><given-names>S</given-names></name><etal/></person-group><article-title>Feedback-informed treatment versus usual psychological treatment for depression and anxiety: a multisite, open-label, cluster randomised controlled trial</article-title><source>Lancet Psychiatry</source><year>2018</year><month>06</month><day>21</day><volume>5</volume><issue>7</issue><fpage>564</fpage><lpage>572</lpage><pub-id pub-id-type="doi">10.1016/S2215-0366(18)30162-7</pub-id><pub-id pub-id-type="medline">29937396</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Graham</surname><given-names>S</given-names></name><name name-style="western"><surname>Depp</surname><given-names>C</given-names></name><name name-style="western"><surname>Lee</surname><given-names>EE</given-names></name><name name-style="western"><surname>Nebeker</surname><given-names>C</given-names></name><name name-style="western"><surname>Tu</surname><given-names>X</given-names></name><name name-style="western"><surname>Kim</surname><given-names>H-C</given-names></name><etal/></person-group><article-title>Artificial intelligence for mental health and mental illnesses: an overview</article-title><source>Curr Psychiatry Rep</source><year>2019</year><month>11</month><day>7</day><volume>21</volume><issue>11</issue><fpage>116</fpage><pub-id pub-id-type="doi">10.1007/s11920-019-1094-0</pub-id><pub-id pub-id-type="medline">31701320</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Freitas</surname><given-names>AA</given-names></name></person-group><article-title>Comprehensible classification models</article-title><source>ACM SIGKDD Explorations Newsletter</source><year>2014</year><month>03</month><day>17</day><volume>15</volume><issue>1</issue><fpage>1</fpage><lpage>10</lpage><pub-id pub-id-type="doi">10.1145/2594473.2594475</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Steyerberg</surname><given-names>EW</given-names></name><name name-style="western"><surname>Harrell</surname><given-names>FE</given-names></name></person-group><article-title>Prediction models need appropriate internal, internal-external, and external validation</article-title><source>J Clin Epidemiol</source><year>2016</year><month>01</month><volume>69</volume><fpage>245</fpage><lpage>247</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2015.04.005</pub-id><pub-id pub-id-type="medline">25981519</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kirtley</surname><given-names>OJ</given-names></name><name name-style="western"><surname>van Mens</surname><given-names>K</given-names></name><name name-style="western"><surname>Hoogendoorn</surname><given-names>M</given-names></name><name name-style="western"><surname>Kapur</surname><given-names>N</given-names></name><name name-style="western"><surname>de Beurs</surname><given-names>D</given-names></name></person-group><article-title>Translating promise into practice: a review of machine learning in suicide research and prevention</article-title><source>Lancet Psychiatry</source><year>2022</year><month>03</month><volume>9</volume><issue>3</issue><fpage>243</fpage><lpage>252</lpage><pub-id pub-id-type="doi">10.1016/S2215-0366(21)00254-6</pub-id><pub-id pub-id-type="medline">35183281</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Mens</surname><given-names>K</given-names></name><name name-style="western"><surname>Lokkerbol</surname><given-names>J</given-names></name><name name-style="western"><surname>Janssen</surname><given-names>R</given-names></name><name name-style="western"><surname>van Orden</surname><given-names>ML</given-names></name><name name-style="western"><surname>Kloos</surname><given-names>M</given-names></name><name name-style="western"><surname>Tiemens</surname><given-names>B</given-names></name></person-group><article-title>A cost-effectiveness analysis to evaluate a system change in mental healthcare in the Netherlands for patients with depression or anxiety</article-title><source>Adm Policy Ment Health</source><year>2018</year><month>07</month><volume>45</volume><issue>4</issue><fpage>530</fpage><lpage>537</lpage><pub-id pub-id-type="doi">10.1007/s10488-017-0842-x</pub-id><pub-id pub-id-type="medline">29247271</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Lambert</surname><given-names>M</given-names></name><name name-style="western"><surname>Morton</surname><given-names>J</given-names></name><name name-style="western"><surname>Hatfield</surname><given-names>D</given-names></name><name name-style="western"><surname>Harmon</surname><given-names>C</given-names></name><name name-style="western"><surname>Hamilton</surname><given-names>S</given-names></name><name name-style="western"><surname>Shimokawa</surname><given-names>K</given-names></name></person-group><source>Administration and Scoring Manual for the OQ-45.2 (Outcome Questionniare). 3rd Edition</source><year>2004</year><publisher-loc>Orem, UT</publisher-loc><publisher-name>American Professional Credentialing Services</publisher-name></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Jong</surname><given-names>K</given-names></name><name name-style="western"><surname>Nugter</surname><given-names>MA</given-names></name><name name-style="western"><surname>Polak</surname><given-names>MG</given-names></name><name name-style="western"><surname>Wagenborg</surname><given-names>JEA</given-names></name><name name-style="western"><surname>Spinhoven</surname><given-names>P</given-names></name><name name-style="western"><surname>Heiser</surname><given-names>WJ</given-names></name></person-group><article-title>The Outcome Questionnaire (OQ-45) in a Dutch population: a cross-cultural validation</article-title><source>Clin Psychol Psychother</source><year>2007</year><month>08</month><day>6</day><volume>14</volume><issue>4</issue><fpage>288</fpage><lpage>301</lpage><pub-id pub-id-type="doi">10.1002/cpp.529</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jacobson</surname><given-names>NS</given-names></name><name name-style="western"><surname>Truax</surname><given-names>P</given-names></name></person-group><article-title>Clinical significance: a statistical approach to defining meaningful change in psychotherapy research</article-title><source>J Consult Clin Psychol</source><year>1991</year><month>02</month><volume>59</volume><issue>1</issue><fpage>12</fpage><lpage>19</lpage><pub-id pub-id-type="doi">10.1037//0022-006x.59.1.12</pub-id><pub-id pub-id-type="medline">2002127</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Cohen</surname><given-names>J</given-names></name></person-group><source>Statistical Power Analysis for the Behavioral Sciences. 2nd Edition</source><year>1988</year><publisher-loc>Hillsdale, NJ</publisher-loc><publisher-name>Lawrence Earlbaum Associates</publisher-name><comment>ISBN: 1483276481</comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tibshirani</surname><given-names>R</given-names></name></person-group><article-title>Regression shrinkage and selection via the lasso</article-title><source>J Royal Stat Soc Ser B Stat Methodology</source><year>1996</year><month>01</month><volume>58</volume><issue>1</issue><fpage>267</fpage><lpage>288</lpage><pub-id pub-id-type="doi">10.1111/j.2517-6161.1996.tb02080.x</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="book"><person-group person-group-type="author"><collab>R Development Core Team</collab></person-group><source>R: A Language and Environment for Statistical Computing</source><year>2008</year><publisher-loc>Vienna, Austria</publisher-loc><publisher-name>R Foundation for Statistical Computing</publisher-name><comment>ISBN: 3-900051-07-0</comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kuhn</surname><given-names>M</given-names></name><name name-style="western"><surname>Johnson</surname><given-names>K</given-names></name></person-group><source>Applied Predictive Modeling</source><year>2013</year><publisher-loc>New York, NY</publisher-loc><publisher-name>Springer</publisher-name><comment>ISBN: 978-1-4614-6848-6</comment><pub-id pub-id-type="doi">10.1007/978-1-4614-6849-3</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Friedman</surname><given-names>J</given-names></name><name name-style="western"><surname>Hastie</surname><given-names>T</given-names></name><name name-style="western"><surname>Tibshirani</surname><given-names>R</given-names></name></person-group><article-title>Regularization paths for generalized linear models via coordinate descent</article-title><source>J Stat Softw</source><year>2010</year><volume>33</volume><issue>1</issue><fpage>1</fpage><lpage>22</lpage><pub-id pub-id-type="medline">20808728</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Robin</surname><given-names>X</given-names></name><name name-style="western"><surname>Turck</surname><given-names>N</given-names></name><name name-style="western"><surname>Hainard</surname><given-names>A</given-names></name><name name-style="western"><surname>Tiberti</surname><given-names>N</given-names></name><name name-style="western"><surname>Lisacek</surname><given-names>F</given-names></name><name name-style="western"><surname>Sanchez</surname><given-names>J-C</given-names></name><etal/></person-group><article-title>pROC: an open-source package for R and S+ to analyze and compare ROC curves</article-title><source>BMC Bioinformatics</source><year>2011</year><month>03</month><day>17</day><volume>12</volume><issue>1</issue><fpage>77</fpage><pub-id pub-id-type="doi">10.1186/1471-2105-12-77</pub-id><pub-id pub-id-type="medline">21414208</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Meindl</surname><given-names>MB</given-names></name><name name-style="western"><surname>Kowarik</surname><given-names>DIA</given-names></name><name name-style="western"><surname>Templ</surname><given-names>PM</given-names></name><name name-style="western"><surname>Templ</surname><given-names>M</given-names></name><name name-style="western"><surname>Meindl</surname><given-names>B</given-names></name><name name-style="western"><surname>Kowarik</surname><given-names>A</given-names></name></person-group><article-title>Introduction to statistical disclosure control (SDC)</article-title><source>International Household Survey Network</source><year>2018</year><access-date>2023-07-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ihsn.org/sites/default/files/resources/ihsn-working-paper-007-Oct27.pdf">www.ihsn.org/sites/default/files/resources/ihsn-working-paper-007-Oct27.pdf</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>F</given-names></name><name name-style="western"><surname>He</surname><given-names>H</given-names></name></person-group><article-title>Assessing the accuracy of diagnostic tests</article-title><source>Shanghai Arch Psychiatry</source><year>2018</year><month>06</month><day>25</day><volume>30</volume><issue>3</issue><fpage>207</fpage><lpage>212</lpage><pub-id pub-id-type="doi">10.11919/j.issn.1002-0829.218052</pub-id><pub-id pub-id-type="medline">30858674</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hilhorst</surname><given-names>L</given-names></name><name name-style="western"><surname>Stappen</surname><given-names>J van der</given-names></name><name name-style="western"><surname>Lokkerbol</surname><given-names>J</given-names></name><name name-style="western"><surname>Hiligsmann</surname><given-names>M</given-names></name><name name-style="western"><surname>Risseeuw</surname><given-names>AH</given-names></name><name name-style="western"><surname>Tiemens</surname><given-names>BG</given-names></name></person-group><article-title>Patients' and psychologists' preferences for feedback reports on expected mental health treatment outcomes: A discrete-choice experiment</article-title><source>Adm Policy Ment Health</source><year>2022</year><volume>49</volume><issue>5</issue><fpage>707</fpage><lpage>721</lpage><pub-id pub-id-type="doi">10.1007/s10488-022-01194-2</pub-id><pub-id pub-id-type="medline">35428931</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McMahon</surname><given-names>FJ</given-names></name></person-group><article-title>Prediction of treatment outcomes in psychiatry-where do we stand?</article-title><source>Dialogues Clin Neurosci</source><year>2014</year><month>12</month><volume>16</volume><issue>4</issue><fpage>455</fpage><lpage>464</lpage><pub-id pub-id-type="doi">10.31887/DCNS.2014.16.4/fmcmahon</pub-id><pub-id pub-id-type="medline">25733951</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lorenzo-Luaces</surname><given-names>L</given-names></name><name name-style="western"><surname>DeRubeis</surname><given-names>RJ</given-names></name><name name-style="western"><surname>van Straten</surname><given-names>A</given-names></name><name name-style="western"><surname>Tiemens</surname><given-names>B</given-names></name></person-group><article-title>A prognostic index (PI) as a moderator of outcomes in the treatment of depression: a proof of concept combining multiple variables to inform risk-stratified stepped care models</article-title><source>J Affect Disord</source><year>2017</year><month>04</month><day>15</day><volume>213</volume><fpage>78</fpage><lpage>85</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2017.02.010</pub-id><pub-id pub-id-type="medline">28199892</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Smeden</surname><given-names>M</given-names></name><name name-style="western"><surname>Moons</surname><given-names>KG</given-names></name><name name-style="western"><surname>de Groot</surname><given-names>JA</given-names></name><name name-style="western"><surname>Collins</surname><given-names>GS</given-names></name><name name-style="western"><surname>Altman</surname><given-names>DG</given-names></name><name name-style="western"><surname>Eijkemans</surname><given-names>MJ</given-names></name><etal/></person-group><article-title>Sample size for binary logistic prediction models: beyond events per variable criteria</article-title><source>Stat Methods Med Res</source><year>2019</year><month>08</month><volume>28</volume><issue>8</issue><fpage>2455</fpage><lpage>2474</lpage><pub-id pub-id-type="doi">10.1177/0962280218784726</pub-id><pub-id pub-id-type="medline">29966490</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Confusion matrix results.</p><media xlink:href="medinform_v11i1e44322_app1.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material></app-group></back></article>