<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e64354</article-id><article-id pub-id-type="doi">10.2196/64354</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Imputation and Missing Indicators for Handling Missing Longitudinal Data: Data Simulation Analysis Based on Electronic Health Record Data</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Ehrig</surname><given-names>Molly</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bullock</surname><given-names>Garrett S</given-names></name><degrees>PhD, DPT</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Leng</surname><given-names>Xiaoyan Iris</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Pajewski</surname><given-names>Nicholas M</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Speiser</surname><given-names>Jaime Lynn</given-names></name><degrees>MS, PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>Department of Biostatistics and Data Science, Wake Forest University School of Medicine</institution><addr-line>Medical Center Blvd</addr-line><addr-line>Winston Salem</addr-line><addr-line>NC</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Lovis</surname><given-names>Christian</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Snavely</surname><given-names>Anna</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Stein</surname><given-names>Maria</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Jaime Lynn Speiser, MS, PhD, Department of Biostatistics and Data Science, Wake Forest University School of Medicine, Medical Center Blvd, Winston Salem, NC, 27157, United States, 1 3367133469; <email>jspeiser@wakehealth.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>13</day><month>3</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e64354</elocation-id><history><date date-type="received"><day>15</day><month>07</month><year>2024</year></date><date date-type="rev-recd"><day>07</day><month>02</month><year>2025</year></date><date date-type="accepted"><day>08</day><month>02</month><year>2025</year></date></history><copyright-statement>&#x00A9; Molly Ehrig, Garrett S Bullock, Xiaoyan Iris Leng, Nicholas M Pajewski, Jaime Lynn Speiser. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 13.3.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e64354"/><abstract><sec><title>Background</title><p>Missing data in electronic health records are highly prevalent and result in analytical concerns such as heterogeneous sources of bias and loss of statistical power. One simple analytic method for addressing missing or unknown covariate values is to treat missingness for a particular variable as a category onto itself, which we refer to as the missing indicator method. For cross-sectional analyses, recent work suggested that there was minimal benefit to the missing indicator method; however, it is unclear how this approach performs in the setting of longitudinal data, in which correlation among clustered repeated measures may be leveraged for potentially improved model performance.</p></sec><sec><title>Objectives</title><p>This study aims to conduct a simulation study to evaluate whether the missing indicator method improved model performance and imputation accuracy for longitudinal data mimicking an application of developing a clinical prediction model for falls in older adults based on electronic health record data.</p></sec><sec sec-type="methods"><title>Methods</title><p>We simulated a longitudinal binary outcome using mixed effects logistic regression that emulated a falls assessment at annual follow-up visits. Using multivariate imputation by chained equations, we simulated time-invariant predictors such as sex and medical history, as well as dynamic predictors such as physical function, BMI, and medication use. We induced missing data in predictors under scenarios that had both random (missing at random) and dependent missingness (missing not at random). We evaluated aggregate performance using the area under the receiver operating characteristic curve (AUROC) for models with and with no missing indicators as predictors, as well as complete case analysis, across simulation replicates. We evaluated imputation quality using normalized root-mean-square error for continuous variables and percent falsely classified for categorical variables.</p></sec><sec sec-type="results"><title>Results</title><p>Independent of the mechanism used to simulate missing data (missing at random or missing not at random), overall model performance via AUROC was similar regardless of whether missing indicators were included in the model. The root-mean-square error and percent falsely classified measures were similar for models including missing indicators versus those with no missing indicators. Model performance and imputation quality were similar regardless of whether the outcome was related to missingness. Imputation with or with no missing indicators had similar mean values of AUROC compared with complete case analysis, although complete case analysis had the largest range of values.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The results of this study suggest that the inclusion of missing indicators in longitudinal data modeling neither improves nor worsens overall performance or imputation accuracy. Future research is needed to address whether the inclusion of missing indicators is useful in prediction modeling with longitudinal data in different settings, such as high dimensional data analysis.</p></sec></abstract><kwd-group><kwd>missing indicator method</kwd><kwd>missing data</kwd><kwd>imputation</kwd><kwd>longitudinal data</kwd><kwd>electronic health record data</kwd><kwd>electronic health records</kwd><kwd>EHR</kwd><kwd>simulation study</kwd><kwd>clinical prediction model</kwd><kwd>prediction model</kwd><kwd>older adults</kwd><kwd>falls</kwd><kwd>logistic regression</kwd><kwd>prediction modeling</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Electronic health record (EHR) data have many analytic uses, including patient monitoring, clinical decision support, quality improvement projects, and research initiatives [<xref ref-type="bibr" rid="ref1">1</xref>]. However, missing data are pervasive in EHRs because these systems were largely designed for the purposes of billing and because of the fragmented nature of health care in the United States where patients often use multiple health systems with disparate EHR systems. The incomplete nature of the EHR creates significant potential for bias for research studies leveraging real-world data [<xref ref-type="bibr" rid="ref2">2</xref>]. Statistically, missing data may be considered ignorable when they are missing completely at random (MCAR) or missing at random (MAR). A recent study illustrated that more than 1 missing mechanism may be present for EHR data, and the assumption that all missing data are MAR is generally not plausible [<xref ref-type="bibr" rid="ref3">3</xref>]. Recent work by Hu et al [<xref ref-type="bibr" rid="ref4">4</xref>] indicated that clinical EHR data were consistent with a mixture of random and nonrandom mechanisms. For example, a white blood cell count test was less likely to be ordered for patients who were clinically doing well (eg, lack of collection).</p><p>Current approaches to handle missing data include complete case analysis, imputation, and nonimputation approaches such as the use of missing indicators. These approaches vary in terms of their appropriateness depending on untestable assumptions about the mechanisms generating missing values. A detailed discussion of statistical approaches to handling missing data can be found in the TRIPOD (Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis) checklist [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Complete case analysis is a common method in which observations with missing values in any of the analysis variables are listwise deleted. If data are MCAR, complete case analysis may be appropriate, but if data are MAR or missing not at random (MNAR), complete case analysis can result in biased estimates. Independent of the missingness mechanism, complete case analysis results in a loss of statistical power by reducing the number of available observations [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>].</p><p>Imputation is another commonly used method for handling missing data and involves using observed data to estimate and fill in values that are missing, typically through regression approaches that model the variable with missingness as the outcome with the other variables in the dataset as predictors. While this method retains all observations in the dataset and reduces bias when data are MAR, regression imputation underestimates the SE of the model parameters and therefore overestimates precision [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Multiple imputation overcomes the limitations of regression imputation by generating multiple imputed values for each missing value. By separately analyzing each dataset and combining the outputs to obtain an overall point estimate and corresponding SE, variability estimates are more accurate and the analysis accounts for the uncertainty caused by missingness [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. However, the appropriate imputation strategy may depend on both the type of missingness and the objective of the analysis. One recent study has shown that regression imputation performs as well as multiple imputation when the ultimate goal is prediction rather than statistical inference or model interpretation [<xref ref-type="bibr" rid="ref11">11</xref>]. Another study found that for logistic regression, regression imputation was comparable with multiple imputation in terms of model performance with a low percentage of missingness [<xref ref-type="bibr" rid="ref12">12</xref>]. However, none of the imputation methods are unbiased or recommended for nonignorable missing data.</p><p>A third approach is the missing indicator method, which adds a binary predictor to the model that takes the value of 1 if the value of a certain variable is missing and zero if the value is not missing, therefore, taking advantage of the information contained in missingness itself [<xref ref-type="bibr" rid="ref13">13</xref>]. The use of missing indicators has been introduced as a method when missingness in informative, or when the presence or absence of missingness adds prognostic information to a model. Although this is a simple method for potentially leveraging information about missingness, it increases the number of predictor variables to be included, which may not be ideal for high-dimensional datasets, datasets with many predictors, or situations where significant model flexibility is desired (ie, semiparametric models that use basis functions or splines to flexibly model continuous predictors such as vital signs or laboratory values).</p><p>There is still a lack of consensus on the appropriateness of the missing indicator method for handling missing data for clinical prediction modeling [<xref ref-type="bibr" rid="ref14">14</xref>]. One concern is the creation of a negative feedback loop between the model and the providers using the model for decision support. When an individual knows that taking or not taking a certain measurement is informative, their decision to take the measurement could hypothetically be impacted [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref15">15</xref>], or the model may simply reiterate a clinical suspicion or decision that has already occurred, such as a recent prediction model for the early detection of sepsis [<xref ref-type="bibr" rid="ref16">16</xref>]. An example for this is the decision to order certain specialized laboratory tests. In addition, prediction models that use the missing indicator method must be consistently monitored and revised due to how quickly patient medical data and factors that affect physician decision-making change [<xref ref-type="bibr" rid="ref15">15</xref>]. However, other work has found that the missing indicator method could improve predictive performance [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. One study found that the addition of missing indicators, which signaled the presence or absence of a laboratory test result, to observed measurements improved area under the receiver operating characteristic curve (AUROC) when predicting clinical outcomes [<xref ref-type="bibr" rid="ref17">17</xref>]. Missing indicators have been shown to increase predictive performance when missingness is informative, with the effectiveness of the method increasing as the informativeness of missingness increased [<xref ref-type="bibr" rid="ref14">14</xref>]. The same study found that the missing indicator method did not harm predictive performance when missingness was uninformative. This is an important distinction, as it is not possible to empirically test whether missingness is informative [<xref ref-type="bibr" rid="ref18">18</xref>].</p><p>There is currently a gap in knowledge regarding the effectiveness of including missing indicators in longitudinal data modeling, specifically whether missing indicators improve model performance and the quality of model-based imputations. The setting of longitudinal repeated measures and clustered data is an important context for the missing indicator method because the correlation within clusters may be leveraged to increase the imputation accuracy and model performance, particularly in the case of data that are MNAR. However, we are not aware of work that has investigated the missing indicator method in this setting.</p><p>We aimed to assess the missing indicator method for longitudinal, repeated-measures data using a simulation study mimicking real-world EHR data. In section 2, we detail the methods we used to generate the synthetic longitudinal data, including fixed and repeated measures of predictors for MAR and MNAR missing data patterns, and we define outcome metrics used to assess performance and imputation quality. In section 3, we present results aggregated across the simulation runs for models with and with no missing indicator variables. In section 4, we discuss the results and implications of the study, compare our study with prior studies, and consider the strengths and limitations of this work.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>This study is a simulation study in which missing indicator variables in imputation and modeling were evaluated under different missing data mechanisms (MAR and MNAR). We follow the simulation study guidelines suggested by Morris and colleagues [<xref ref-type="bibr" rid="ref19">19</xref>]. Analyses were performed with R (version 4.2.1; The R Project for Statistical Computing). All code is available on our GitHub repository [<xref ref-type="bibr" rid="ref20">20</xref>]. We use the following R packages in our analysis: <italic>bindata</italic> [<xref ref-type="bibr" rid="ref21">21</xref>], <italic>MASS</italic> [<xref ref-type="bibr" rid="ref22">22</xref>], <italic>tidyverse</italic> [<xref ref-type="bibr" rid="ref23">23</xref>], <italic>lme4</italic> [<xref ref-type="bibr" rid="ref24">24</xref>], <italic>lmerTest</italic> [<xref ref-type="bibr" rid="ref25">25</xref>], <italic>naniar</italic> [<xref ref-type="bibr" rid="ref26">26</xref>], <italic>mice</italic> [<xref ref-type="bibr" rid="ref27">27</xref>], <italic>broom.mixed</italic> [<xref ref-type="bibr" rid="ref28">28</xref>], <italic>pROC</italic> [<xref ref-type="bibr" rid="ref29">29</xref>], <italic>DescTools</italic> [<xref ref-type="bibr" rid="ref30">30</xref>], <italic>missForest</italic> [<xref ref-type="bibr" rid="ref31">31</xref>], <italic>table1</italic> [<xref ref-type="bibr" rid="ref32">32</xref>], <italic>flextable</italic> [<xref ref-type="bibr" rid="ref32">32</xref>], <italic>skimr</italic> [<xref ref-type="bibr" rid="ref32">32</xref>], <italic>sjPlot</italic> [<xref ref-type="bibr" rid="ref33">33</xref>], <italic>gridExtra</italic> [<xref ref-type="bibr" rid="ref34">34</xref>], <italic>grid</italic> [<xref ref-type="bibr" rid="ref35">35</xref>], and <italic>car</italic> [<xref ref-type="bibr" rid="ref36">36</xref>].</p></sec><sec id="s2-2"><title>Data-Generating Mechanisms</title><p>This study focuses on a mixed effects logistic regression model that uses a binary outcome simulated to represent whether or not patients experienced a fall since their last visit. A total of 250 patients were simulated, each with 5 visits. Medical history variables, demographic variables, fall-specific variables, and variables intended to add noise to the model were simulated. We simulated the data to represent EHR data that may be used to develop models for falls in older adults. Predictors of falls were simulated based on previous research [<xref ref-type="bibr" rid="ref37">37</xref>] and represent a combination of fixed, patient-level variables and visit-level variables that are collected repeatedly. The fixed variables included sex and comorbidities (diabetes, dementia, hypertension, and urinary incontinence), all of which may be related to falls in older adults. The visit-level variables included BMI, gait speed, single-leg balance, and use of medications (pain or depression), again representing variables that could be associated will falls in older adults. <xref ref-type="table" rid="table1">Table 1</xref> lists all variables in the dataset and describes how they were simulated. We include summaries of the variables for one of the simulated datasets in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. For more details, including parameter values, see the code on GitHub.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Variable list and description.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variables</td><td align="left" valign="bottom">Data generation and description</td></tr></thead><tbody><tr><td align="left" valign="top">Patient-level variables</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Birth sex, diabetes, dementia, hypertension, and urinary incontinence</td><td align="left" valign="top">Binary random variables simulated with the bindata R package</td></tr><tr><td align="left" valign="top">&#x2003;Age</td><td align="left" valign="top">Continuous with mean dependent on number of chronic conditions (ie, number of the following conditions: diabetes, dementia, hypertension, and urinary incontinence)</td></tr><tr><td align="left" valign="top">Visit-level variables</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Visit</td><td align="left" valign="top">Discrete, 5 visits for each patient.</td></tr><tr><td align="left" valign="top">&#x2003;BMI</td><td align="left" valign="top">Continuous, simulated with a linear mixed effects model with age, diabetes, hypertension, and birth sex as predictors. Random intercept for patient ID and random error included.</td></tr><tr><td align="left" valign="top">&#x2003;Gait speed</td><td align="left" valign="top">Continuous, simulated with a linear mixed effects model with age, BMI, diabetes, and birth sex as predictors. Random intercept for patient ID and random error included.</td></tr><tr><td align="left" valign="top">&#x2003;Single-leg balance</td><td align="left" valign="top">Continuous, simulated with a linear mixed effects model with age, BMI, diabetes, dementia, and birth sex as predictors. Random intercept for patient ID and random error included.</td></tr><tr><td align="left" valign="top">&#x2003;Pain medication</td><td align="left" valign="top">Binary, probability simulated with expit function with age, sex, and diabetes as predictors in the model. A random intercept for patient was included in the model. The probability was then used to simulate a Bernoulli random variable where:<break/>0=did not take pain medication since last visit<break/>1=took pain medication since last visit</td></tr><tr><td align="left" valign="top">&#x2003;Depression medication</td><td align="left" valign="top">Binary, probability simulated with expit function with age, sex, and dementia as predictors in the model. A random intercept for patient ID was included in the model. The probability was then used to simulate a Bernoulli random variable where:<break/>0 = did not take depression medication since last visit<break/>1 = took depression medication since last visit</td></tr><tr><td align="left" valign="top">&#x2003;Junk 1&#x2010;5</td><td align="left" valign="top">Continuous random variables with means and SDs chosen at random.</td></tr><tr><td align="left" valign="top">&#x2003;Y</td><td align="left" valign="top">Binary outcome variable, probability simulated with expit function with all variables except the junk variables and visit as predictors. A random intercept for patient ID was also included in the model. The probability was simulated with and with no missing indicators included in the model. The probability was then used to simulate a Bernoulli random variable where:<break/>0 = did not fall since last visit<break/>1 = fall since last visit</td></tr></tbody></table></table-wrap><p>The probability of the binary outcome was simulated in 2 different ways using the expit function. For both versions, the model included a random intercept for patient, and all variables except the junk variables, patient ID, and visit were included as predictors. The first version included missing indicator variables as predictors in the model, while the second did not. The outcome was a random Bernoulli variable with the probability of being one equal to the calculated probability for each visit. A total of 250 iterations were run, so 250 different datasets were created.</p><p>Missingness was induced for the visit-level continuous variables gait speed and single-leg balance, and for the binary variables pain medication and depression medication. Overall missing data percentages of 20% and 50% were simulated. Under the assumption of MAR, the probability that gait speed, single-leg balance, pain medication, and depression medication were missing for a specific visit was dependent on age, BMI, diabetes, and urinary incontinence. Specifically, the probability each variable was missing was simulated with the expit function where age, BMI, diabetes, and urinary incontinence were included as predictors. The intercept was changed to achieve different percentages of missing data. The probability was higher for older patients, patients with a larger BMI, and patients with diabetes or urinary incontinence. Missing indicators were created by defining Bernoulli random variables with the probability of being one equal to the probability of being missing, and indicator variables were created for each of the 4 variables. If the missing indicator was 1, the value of the corresponding variable was set to missing. Therefore, although all 4 variables had the same probability of being missing for each visit, different combinations of variables could be missing at each visit.</p><p>Under the MNAR missingness mechanism, the probability that gait speed, single-leg balance, pain medication, and depression medication were missing for a specific visit was dependent on the value of the variable itself. For gait speed and single-leg balance, if the value of the variable at a visit was less than the 25th percentile, the probability of the value being set to missing was .7 to target an overall missing percentage of 50% and .3 to target an overall missing percentage of 20%. Otherwise, the probability was zero. For pain medication and depression medication, if the value of the variable was 1 at a visit (indicating that the patient was taking the medication), the probability the value was set to missing was .4 to target an overall missing percentage of 50% and .1 to target an overall missing percentage of 20%. Otherwise, the probability was zero. Therefore, lower values of gait speed and single-leg balance were more likely to be missing. Similarly, if patients were taking pain medication or depression medication, these values were more likely to be missing. For all of the simulated scenarios, the outcome, all patient-level variables, and the remaining visit-level variable (BMI) were fully observed.</p></sec><sec id="s2-3"><title>Missing Data-Handling Strategies</title><p>Multivariate imputation via chained equations was performed using the <italic>mice</italic> package in R [<xref ref-type="bibr" rid="ref27">27</xref>]. Regression imputation was performed using single imputation (ie, multiple imputation was not used because the purpose of the model is prediction). All variables in the dataset were included in the imputation model, including the outcome variable. The 2-level structure of the dataset was specified in the imputation model by denoting patient as the clustering variable. To impute gait speed and single-leg balance, a 2-level normal model was used. Values below zero were capped at zero. To impute pain and depression medication, a 2-level logistic model was used. When imputing a variable, the indicator for that variable was not included in the imputation model because in imputation only present data are used and the value of the indicator is 1 for all present data. The indicators for the other variables were included in the imputation model.</p><p>The outcome was calculated with a mixed effects logistic regression for both analyses that included and did not include missing indicator variables. All variables except visit were included in the model as predictors, and a random intercept for patient was also included in the model. Missing indicators were included in the model when they had also been included in the imputation model. The junk variables were included with the expectation that they would not be significant in the model. Complete case analysis was performed by deleting all observations with missing values prior to running the model. A summary of the different scenarios and models run is shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Summary of Modeling.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Outcome simulation and missing data mechanism</td><td align="left" valign="bottom">Target missing percentage</td><td align="left" valign="bottom">Imputation and modeling strategy</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Missing indicators included in model for outcome simulation</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">MAR<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="char" char="." valign="top">20</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Missing indicators included in imputation and modeling</p></list-item><list-item><p>No missing indicators included in imputation and modeling</p></list-item><list-item><p>Complete case analysis</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">MAR</td><td align="char" char="." valign="top">50</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Missing indicators included in imputation and modeling</p></list-item><list-item><p>No missing indicators included in imputation and modeling</p></list-item><list-item><p>Complete case analysis</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">MNAR<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="char" char="." valign="top">20</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Missing indicators included in imputation and modeling</p></list-item><list-item><p>No missing indicators included in imputation and modeling</p></list-item><list-item><p>Complete case analysis</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">MNAR</td><td align="char" char="." valign="top">50</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Missing indicators included in imputation and modeling</p></list-item><list-item><p>No missing indicators included in imputation and modeling</p></list-item><list-item><p>Complete case analysis</p></list-item></list></td></tr><tr><td align="left" valign="top" colspan="4">No missing indicators included in model for outcome simulation</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">MAR</td><td align="char" char="." valign="top">20</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Missing indicators included in imputation and modeling</p></list-item><list-item><p>No missing indicators included in imputation and modeling</p></list-item><list-item><p>Complete case analysis</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">MAR</td><td align="char" char="." valign="top">50</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Missing indicators included in imputation and modeling</p></list-item><list-item><p>No missing indicators included in imputation and modeling</p></list-item><list-item><p>Complete case analysis</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">MNAR</td><td align="char" char="." valign="top">20</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Missing indicators included in imputation and modeling</p></list-item><list-item><p>No missing indicators included in imputation and modeling</p></list-item><list-item><p>Complete case analysis</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">MNAR</td><td align="char" char="." valign="top">50</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Missing indicators included in imputation and modeling</p></list-item><list-item><p>No missing indicators included in imputation and modeling</p></list-item><list-item><p>Complete case analysis</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>MAR: missing at random.</p></fn><fn id="table2fn2"><p><sup>b</sup>MNAR: missing not at random.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-4"><title>Performance Metrics</title><p>We assessed models in terms of performance and imputation quality. To assess model performance, AUROC was calculated. To assess imputation quality for binary variables, the proportion of falsely classified imputations (PFC) was calculated [<xref ref-type="bibr" rid="ref38">38</xref>], defined as the number of incorrect binary imputed values divided by the total number of imputed values. Lower proportions indicate better quality of imputations. To assess imputation quality for continuous variables, the normalized root-mean-square error (NRMSE) between the imputed values and the observed values was calculated. The root-mean-square error is normalized by dividing by the SD of the observed values (from the study by Stekhoven and B&#x00FC;hlmann [<xref ref-type="bibr" rid="ref38">38</xref>]). Lower NRSME indicates better imputation quality. For each iteration and scenario, the PFC, NRMSE, and AUROC were stored and the average values were calculated across the simulation runs.</p></sec><sec id="s2-5"><title>Simulation Study Analysis Pipeline</title><p><xref ref-type="fig" rid="figure1">Figure 1</xref> provides an overview of the simulation and analysis performed in this study. The first key step is data generation, with patient-level variables generated first, then visit-level variables, and finally the outcome under the 2 underlined scenarios. Missingness is then induced under different mechanisms and at different percentages, and imputation occurs with and without the missing indicators in the imputation model. After the calculation of evaluation metrics, models were run with and without the missing indicators as predictors in the model&#x2014;along with complete-case analysis&#x2014;and the AUROC of each model was extracted. For models using imputation, NRMSE was calculated for continuous variables and PFC was calculated for binary variables. Results for each run of the simulation were aggregated, and averages of the performance metrics are presented.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Data pipeline flowchart. AUC: area under the receiver operating characteristic curve; MAR: missing at random; MNAR: missing not at random.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e64354_fig01.png"/></fig></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p><xref ref-type="table" rid="table3">Table 3</xref> shows the average overall percentage of missing data and the SD for each scenario under the different missing mechanisms and data-generating mechanisms. For all scenarios, the actual missing percentage of data was slightly higher than the targeted amount.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Missingness Percentages.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Outcome simulation and missing data mechanism</td><td align="left" valign="bottom">Target missing percentage</td><td align="left" valign="bottom">Actual missing percentage, mean (SD)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Indicators included in model for outcome simulation</td></tr><tr><td align="left" valign="top" rowspan="4"/><td align="left" valign="top">MAR<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">20</td><td align="left" valign="top">22.53 (1.09)</td></tr><tr><td align="left" valign="top">MAR</td><td align="left" valign="top">50</td><td align="left" valign="top">52.18 (1.33)</td></tr><tr><td align="left" valign="top">MNAR<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">20</td><td align="left" valign="top">22.20 (1.08)</td></tr><tr><td align="left" valign="top">MNAR</td><td align="left" valign="top">50</td><td align="left" valign="top">54.28 (1.33)</td></tr><tr><td align="left" valign="top" colspan="4">Indicators not included in model for outcome simulation</td></tr><tr><td align="left" valign="top" rowspan="4"/><td align="left" valign="top">MAR</td><td align="left" valign="top">20</td><td align="left" valign="top">22.50 (1.06)</td></tr><tr><td align="left" valign="top">MAR</td><td align="left" valign="top">50</td><td align="left" valign="top">52.34 (1.27)</td></tr><tr><td align="left" valign="top">MNAR</td><td align="left" valign="top">20</td><td align="left" valign="top">22.35 (1.07)</td></tr><tr><td align="left" valign="top">MNAR</td><td align="left" valign="top">50</td><td align="left" valign="top">54.29 (1.33)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>MAR: missing at random.</p></fn><fn id="table3fn2"><p><sup>b</sup>MNAR: missing not at random.</p></fn></table-wrap-foot></table-wrap><sec id="s3-1"><title>Imputation Quality</title><p>First, we present results related to imputation quality. We begin by assessing the PFC for the binary variables (<xref ref-type="fig" rid="figure2">Figure 2</xref>), in which higher PFC indicates a higher misclassification rate and therefore worse imputation quality. For MAR scenarios (<xref ref-type="fig" rid="figure2">Figure 2A and B</xref>), the PFC was about 46%&#x2010;47% for both pain and depression medication, regardless of whether indicators were used to simulate the outcome. There was little difference between the PFC at 20% of missing data compared with 50% of missing data. For MNAR scenarios (<xref ref-type="fig" rid="figure2">Figure 2C and D</xref>), the PFC was about 61%&#x2010;63% for both pain and depression medication at 50% of missing data and about 52%&#x2010;53% for 20% of missing data. PFCs comparing including missing indicators versus not including missing indicators were similar. The PFCs were higher for MNAR data than for MAR data.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Average proportion of falsely classified imputations (PFC) for binary variables across iterations. The average value is indicated with a point, and the lines go to the 2.5th percentile and 97.5th quantiles. Panels A and B are for MAR data, when indicators are not included in the outcome simulation and when indicators are included in the outcome simulation. Panels C and D are for MNAR data, when indicators are not included in the outcome simulation and when indicators are included in the outcome simulation. MAR: missing at random; MNAR: missing not at random.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e64354_fig02.png"/></fig><p>Next, we assess NRMSE for continuous variables (<xref ref-type="fig" rid="figure3">Figure 3A-D</xref>), in which higher NRMSE indicates worse imputation quality. In general, the NRMSE of single-leg balance was lower than that of gait speed. For the variables gait speed and single-leg balance, NRMSE was higher when there was 50% of missing data compared with 20% of missing. NRMSE was higher in MNAR scenarios compared with MAR scenarios. Whether or not indicators were included when simulating the outcome resulted in similar NRMSE for the variables. The NRMSE for the imputation of gait speed was slightly larger when indicators were included for all scenarios, but for single-leg balance there was no clear pattern.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Average normalized root-mean-square error (NRMSE) for continuous variables across iterations. The average value is indicated with a point, and the lines go to the 2.5th percentile and 97.5th quantiles. Panels A and B are for MAR data, when indicators are not included in the outcome simulation and when indicators are included in the outcome simulation. Panels C and D are for MNAR data, when indicators are not included in the outcome simulation and when indicators are included in the outcome simulation. GS: gait speed; MAR: missing at random; MNAR: missing not at random.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e64354_fig03.png"/></fig></sec><sec id="s3-2"><title>Performance Evaluation</title><p>We compare AUROC values for complete case analysis with the imputation methods (indicators included vs not included) in <xref ref-type="fig" rid="figure4">Figure 4A-D</xref>. AUROCs for the methods within a simulated scenario were generally similar and close to 0.75. The complete case analysis had the largest spread of AUROC values, whereas imputation with or with no missing indicators had similar spread of AUROC values. The amount of missing data (20% or 50%) and the missing data assumption (MAR and MNAR) did not substantially impact the AUROC values, which were similar across these groups. Comparing models using missing indicators with those with no missing indicators, we observed overlap in the AUROC intervals.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Average AUC comparison across iterations. The average value is indicated with a point, and the lines go to the 2.5th percentile and 97.5th quantiles. Panels A and B are for MAR data, when indicators are not included in the outcome simulation and when indicators are included in the outcome simulation. Panels C and D are for MNAR data, when indicators are not included in the outcome simulation and when indicators are included in the outcome simulation. AUC: area under the receiver operating characteristic curve; MAR: missing at random; MNAR: missing not at random.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e64354_fig04.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>This study investigated the performance of the missing indicator method in terms of imputation quality and model performance for longitudinal data under MAR and MNAR mechanisms and different amounts of missing data. The imputation quality was worse under MNAR, as the PFC was about 15% higher under MNAR and the NRMSE for continuous values were higher under MNAR. When data were MAR and MNAR, the inclusion of missing indicators in the imputation and outcome models had a minimal effect on AUROC, regardless of whether the indicators were included as inputs when simulating the outcome. Therefore, the results from our simulation of longitudinal data mimicking data from the EHR suggest that the missing indicator method may not improve imputation quality or model performance, even when data are MNAR. However, it does not seem that including missing indicators harms imputation quality or model performance either.</p><p>In all scenarios, AUROC from complete case analysis was similar to AUROC from the other models, but the range of values was largest. While complete case analysis had similar AUROC values to imputation, we would not generally advocate for the use of complete case analysis. The increased variability associated with complete case analysis compared with imputation approaches can result in loss of power. In addition, while this study is focused on prediction and does not report model parameter estimates, complete case analysis may result in biased model coefficient estimates when data are MAR or MNAR. If model interpretation is of interest, complete case analysis will likely result in bias in settings such as EHR data, where missingness is often informative.</p><p>It was somewhat surprising that the imputations for the simulated binary variables were poor, as demonstrated by the high rates of PFC in <xref ref-type="fig" rid="figure2">Figure 2</xref>. We were not expecting such high errors in the imputed values. We hypothesize that some of the error may be attributed to rounding to force the imputed values to be binary, as many imputation methods provide a probability for binary variables which then must be handled in the analysis. Although the accuracy of the binary variable imputations was poor in our simulations, our main focus was on whether or not missing indicators may be beneficial for imputation and modeling. Future work may investigate the accuracy of imputation methods for multilevel data, especially when the predictors contain a mix of binary and continuous variables.</p><p>Previous studies on the missing indicator method have shown conflicting results. Van Ness et al [<xref ref-type="bibr" rid="ref14">14</xref>] found that when missingness is informative, the missing indicator method increases predictive performance of linear models and neural networks with mean imputation and other imputation methods. The authors simulated data using an informativeness parameter, which differs from our study. The only situation where the method harmed predictive performance was in high-dimensional data, where the addition of uninformative indicators led to overfitting. Sperrin and Martin [<xref ref-type="bibr" rid="ref39">39</xref>] found that the method improves causal effect estimation when missingness is informative when combined with multiple imputation. Sisk et al [<xref ref-type="bibr" rid="ref11">11</xref>] investigated the use of the missing indicator method in addition to both regression and multiple imputation to deal with nonignorable missing data in prediction modeling. Similar to Van Ness et al [<xref ref-type="bibr" rid="ref14">14</xref>], Sisk et al [<xref ref-type="bibr" rid="ref11">11</xref>] showed that the missing indicator method corrected bias but requires the assumption that the missing mechanism remains constant throughout the clinical prediction model pipeline, which may not be plausible because of how the likelihood of collection differs across providers.</p><p>The results of our study contribute to the growing body of literature aiming to provide guidance regarding the missing indicator method. Our simulation based on EHR data of falls in older adults using a longitudinal, repeated-measures setup suggested that the missing indicator method may not be beneficial in terms of imputation quality or model performance, but it also did not seem to cause harm. None of the previously described papers used longitudinal data when investigating the missing indicator method with a focus on prediction modeling, which may be a reason why the results of this paper differ from findings of the other papers mentioned. There is clearly debate as to the potential benefit and harm of the missing indicator method, and this paper provides guidance for longitudinal, repeated-measures data.</p><p>Our study should be considered within the context of its strengths and limitations. We used a simulation framework, which has multiple advantages that allow for the evaluation of statistical methods. A major strength of this study is the ability to define and control the missing mechanism. In practice, investigators can make assumptions regarding why data are missing, but there is no statistical test to decide whether data are MAR or MNAR. In this study, because the truth regarding the missing mechanism for each variable is known, no assumptions are made. The effectiveness of the missing indicator method can be evaluated and compared between the 2 mechanisms. In addition, because the true value of all variables is known, the imputations themselves can be evaluated for quality.</p><p>Despite the many strengths of our study, there are some limitations. One limitation of this study was the quality of imputations for the binary variables. With 45%&#x2010;60% of values being incorrectly classified, the imputation performed only slightly better than random guessing. This may have impacted how beneficial the missing indicators were in modeling. A future study could investigate how to boost imputation performance in longitudinal data, perhaps using machine learning imputation methods. Another limitation of the study is that time-dependent covariates were not considered. Future work may investigate the missing indicator method in this setting. Other limitations are related to the nature of simulation studies. Assumptions about the relationships between variables must be made, and these relationships are often oversimplified. The results may be sensitive to the parameter values chosen for the study; however, we completed a rigorous study based on a real-world scenario of falls in older adults. Future studies could evaluate how the addition of more visits, missed visits, dropout, and other missing patterns common in EHR data impacts results. In addition, a future simulation study could use an informativeness missing parameter such as that imposed in Van Ness&#x2019; analysis for MNAR scenarios.</p><p>The results of this study suggest that the inclusion of missing indicators in longitudinal data modeling does not seem to be beneficial for overall performance or imputation accuracy, as neither metric improved. However, inclusion of missing indicators does not appear to cause harm in terms of performance or imputation accuracy, as neither metric worsened. Future research may address whether the inclusion of missing indicators is useful in prediction modeling with longitudinal data in different settings, such as high-dimensional data analysis.</p></sec></body><back><ack><p>This project was supported in part by the National Institutes of Health/National Library of Medicine (R25 LM014214) in the Department of Biomedical Engineering and Center for Biomedical Informatics at Wake Forest University School of Medicine. JLS is supported by the National Institute on Aging of the National Institutes of Health under award number K25AG068253. This study was supported in part by the Wake Forest Claude D. Pepper Older Americans Independence Centers (P30 AG021332). The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health.</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AUROC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb2">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb3">MAR</term><def><p>missing at random</p></def></def-item><def-item><term id="abb4">MCAR</term><def><p>missing completely at random</p></def></def-item><def-item><term id="abb5">MNAR</term><def><p>missing not at random</p></def></def-item><def-item><term id="abb6">NRMSE</term><def><p>normalized root-mean-square error</p></def></def-item><def-item><term id="abb7">PFC</term><def><p> proportion of falsely classified imputations</p></def></def-item><def-item><term id="abb8">TRIPOD</term><def><p>Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Ehrenstein</surname><given-names>V</given-names> </name><name name-style="western"><surname>Kharrazi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Lehmann</surname><given-names>H</given-names> </name><name name-style="western"><surname>Taylor</surname><given-names>CO</given-names> </name></person-group><article-title>Obtaining data from electronic health records</article-title><source>Tools and Technologies for Registry Interoperability, Registries for Evaluating Patient Outcomes: A User&#x2019;s Guide</source><year>2019</year><edition>3</edition><publisher-name>Agency for Healthcare Research and Quality</publisher-name></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wells</surname><given-names>BJ</given-names> </name><name name-style="western"><surname>Chagin</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Nowacki</surname><given-names>AS</given-names> </name><name name-style="western"><surname>Kattan</surname><given-names>MW</given-names> </name></person-group><article-title>Strategies for handling missing data in electronic health record derived data</article-title><source>EGEMS (Wash DC)</source><year>2013</year><volume>1</volume><issue>3</issue><fpage>1035</fpage><pub-id pub-id-type="doi">10.13063/2327-9214.1035</pub-id><pub-id pub-id-type="medline">25848578</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Haneuse</surname><given-names>S</given-names> </name><name name-style="western"><surname>Arterburn</surname><given-names>D</given-names> </name><name name-style="western"><surname>Daniels</surname><given-names>MJ</given-names> </name></person-group><article-title>Assessing missing data assumptions in EHR-based studies: a complex and underappreciated task</article-title><source>JAMA Netw Open</source><year>2021</year><month>02</month><day>1</day><volume>4</volume><issue>2</issue><fpage>e210184</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2021.0184</pub-id><pub-id pub-id-type="medline">33635321</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Melton</surname><given-names>GB</given-names> </name><name name-style="western"><surname>Arsoniadis</surname><given-names>EG</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Kwaan</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Simon</surname><given-names>GJ</given-names> </name></person-group><article-title>Strategies for handling missing clinical data for automated surgical site infection detection from the electronic health record</article-title><source>J Biomed Inform</source><year>2017</year><month>04</month><volume>68</volume><fpage>112</fpage><lpage>120</lpage><pub-id pub-id-type="doi">10.1016/j.jbi.2017.03.009</pub-id><pub-id pub-id-type="medline">28323112</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Collins</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name><name name-style="western"><surname>Dhiman</surname><given-names>P</given-names> </name><etal/></person-group><article-title>TRIPOD+AI statement: updated guidance for reporting clinical prediction models that use regression or machine learning methods</article-title><source>BMJ</source><year>2024</year><month>04</month><day>16</day><volume>385</volume><fpage>e078378</fpage><pub-id pub-id-type="doi">10.1136/bmj-2023-078378</pub-id><pub-id pub-id-type="medline">38626948</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Collins</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Reitsma</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Altman</surname><given-names>DG</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name></person-group><article-title>Transparent Reporting of a multivariable prediction model for Individual Prognosis Or Diagnosis (TRIPOD): the TRIPOD Statement</article-title><source>Br J Surg</source><year>2015</year><month>02</month><volume>102</volume><issue>3</issue><fpage>148</fpage><lpage>158</lpage><pub-id pub-id-type="doi">10.1002/bjs.9736</pub-id><pub-id pub-id-type="medline">25627261</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Papageorgiou</surname><given-names>G</given-names> </name><name name-style="western"><surname>Grant</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Takkenberg</surname><given-names>JJM</given-names> </name><name name-style="western"><surname>Mokhles</surname><given-names>MM</given-names> </name></person-group><article-title>Statistical primer: how to deal with missing data in scientific research?</article-title><source>Interact Cardiovasc Thorac Surg</source><year>2018</year><month>08</month><day>1</day><volume>27</volume><issue>2</issue><fpage>153</fpage><lpage>158</lpage><pub-id pub-id-type="doi">10.1093/icvts/ivy102</pub-id><pub-id pub-id-type="medline">29757374</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Z</given-names> </name></person-group><article-title>Missing data imputation: focusing on single imputation</article-title><source>Ann Transl Med</source><year>2016</year><month>01</month><volume>4</volume><issue>1</issue><fpage>9</fpage><pub-id pub-id-type="doi">10.3978/j.issn.2305-5839.2015.12.38</pub-id><pub-id pub-id-type="medline">26855945</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Emmanuel</surname><given-names>T</given-names> </name><name name-style="western"><surname>Maupong</surname><given-names>T</given-names> </name><name name-style="western"><surname>Mpoeleng</surname><given-names>D</given-names> </name><name name-style="western"><surname>Semong</surname><given-names>T</given-names> </name><name name-style="western"><surname>Mphago</surname><given-names>B</given-names> </name><name name-style="western"><surname>Tabona</surname><given-names>O</given-names> </name></person-group><article-title>A survey on missing data in machine learning</article-title><source>J Big Data</source><year>2021</year><volume>8</volume><issue>1</issue><fpage>140</fpage><pub-id pub-id-type="doi">10.1186/s40537-021-00516-9</pub-id><pub-id pub-id-type="medline">34722113</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>P</given-names> </name><name name-style="western"><surname>Stuart</surname><given-names>EA</given-names> </name><name name-style="western"><surname>Allison</surname><given-names>DB</given-names> </name></person-group><article-title>Multiple imputation: a flexible tool for handling missing data</article-title><source>JAMA</source><year>2015</year><month>11</month><day>10</day><volume>314</volume><issue>18</issue><fpage>1966</fpage><lpage>1967</lpage><pub-id pub-id-type="doi">10.1001/jama.2015.15281</pub-id><pub-id pub-id-type="medline">26547468</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sisk</surname><given-names>R</given-names> </name><name name-style="western"><surname>Sperrin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Peek</surname><given-names>N</given-names> </name><name name-style="western"><surname>van Smeden</surname><given-names>M</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>GP</given-names> </name></person-group><article-title>Imputation and missing indicators for handling missing data in the development and deployment of clinical prediction models: a simulation study</article-title><source>Stat Methods Med Res</source><year>2023</year><month>08</month><volume>32</volume><issue>8</issue><fpage>1461</fpage><lpage>1477</lpage><pub-id pub-id-type="doi">10.1177/09622802231165001</pub-id><pub-id pub-id-type="medline">37105540</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Javanbakht</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ragsdale</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>S</given-names> </name><name name-style="western"><surname>Siminski</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gorbach</surname><given-names>P</given-names> </name></person-group><article-title>Comparing single and multiple imputation strategies for harmonizing substance use data across HIV-related cohort studies</article-title><source>BMC Med Res Methodol</source><year>2022</year><month>04</month><day>3</day><volume>22</volume><issue>1</issue><fpage>90</fpage><pub-id pub-id-type="doi">10.1186/s12874-022-01554-4</pub-id><pub-id pub-id-type="medline">35369872</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Groenwold</surname><given-names>RHH</given-names> </name><name name-style="western"><surname>White</surname><given-names>IR</given-names> </name><name name-style="western"><surname>Donders</surname><given-names>ART</given-names> </name><name name-style="western"><surname>Carpenter</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Altman</surname><given-names>DG</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name></person-group><article-title>Missing covariate data in clinical research: when and when not to use the missing-indicator method for analysis</article-title><source>CMAJ</source><year>2012</year><month>08</month><day>7</day><volume>184</volume><issue>11</issue><fpage>1265</fpage><lpage>1269</lpage><pub-id pub-id-type="doi">10.1503/cmaj.110977</pub-id><pub-id pub-id-type="medline">22371511</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Van Ness</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bosschieter</surname><given-names>TM</given-names> </name><name name-style="western"><surname>Halpin-Gregorio</surname><given-names>R</given-names> </name><name name-style="western"><surname>Udell</surname><given-names>M</given-names> </name></person-group><article-title>The missing indicator method: from low to high dimensions</article-title><conf-name>KDD &#x2019;23: Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining</conf-name><conf-date>Aug 6-10, 2023</conf-date><conf-loc>Long Beach, CA</conf-loc><pub-id pub-id-type="doi">10.1145/3580305.3599911</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Smeden</surname><given-names>M</given-names> </name><name name-style="western"><surname>Reitsma</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Riley</surname><given-names>RD</given-names> </name><name name-style="western"><surname>Collins</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KG</given-names> </name></person-group><article-title>Clinical prediction models: diagnosis versus prognosis</article-title><source>J Clin Epidemiol</source><year>2021</year><month>04</month><volume>132</volume><fpage>142</fpage><lpage>145</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2021.01.009</pub-id><pub-id pub-id-type="medline">33775387</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wong</surname><given-names>A</given-names> </name><name name-style="western"><surname>Otles</surname><given-names>E</given-names> </name><name name-style="western"><surname>Donnelly</surname><given-names>JP</given-names> </name><etal/></person-group><article-title>External validation of a widely implemented proprietary sepsis prediction model in hospitalized patients</article-title><source>JAMA Intern Med</source><year>2021</year><month>08</month><day>1</day><volume>181</volume><issue>8</issue><fpage>1065</fpage><lpage>1070</lpage><pub-id pub-id-type="doi">10.1001/jamainternmed.2021.2626</pub-id><pub-id pub-id-type="medline">34152373</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sharafoddini</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dubin</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Maslove</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>J</given-names> </name></person-group><article-title>A new insight into missing data in intensive care unit patient profiles: observational study</article-title><source>JMIR Med Inform</source><year>2019</year><month>01</month><day>8</day><volume>7</volume><issue>1</issue><fpage>e11605</fpage><pub-id pub-id-type="doi">10.2196/11605</pub-id><pub-id pub-id-type="medline">30622091</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heymans</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Twisk</surname><given-names>JWR</given-names> </name></person-group><article-title>Handling missing data in clinical research</article-title><source>J Clin Epidemiol</source><year>2022</year><month>11</month><volume>151</volume><fpage>185</fpage><lpage>188</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2022.08.016</pub-id><pub-id pub-id-type="medline">36150546</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Morris</surname><given-names>TP</given-names> </name><name name-style="western"><surname>White</surname><given-names>IR</given-names> </name><name name-style="western"><surname>Crowther</surname><given-names>MJ</given-names> </name></person-group><article-title>Using simulation studies to evaluate statistical methods</article-title><source>Stat Med</source><year>2019</year><month>05</month><day>20</day><volume>38</volume><issue>11</issue><fpage>2074</fpage><lpage>2102</lpage><pub-id pub-id-type="doi">10.1002/sim.8086</pub-id><pub-id pub-id-type="medline">30652356</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Ehrig</surname><given-names>M</given-names> </name></person-group><article-title>Missing indicator paper</article-title><source>GitHub</source><year>2024</year><access-date>2025-03-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/mehrig1/Missing-Indicator-Paper">https://github.com/mehrig1/Missing-Indicator-Paper</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Leisch</surname><given-names>F</given-names> </name><name name-style="western"><surname>Weingessel</surname><given-names>A</given-names> </name><name name-style="western"><surname>Leisch</surname><given-names>MF</given-names> </name></person-group><article-title>Package bindata</article-title><source>The Comprehensive R Archive Network</source><year>2006</year><access-date>2024-07-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/bindata/">https://cran.r-project.org/web/packages/bindata/</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Ripley</surname><given-names>B</given-names> </name><name name-style="western"><surname>Venables</surname><given-names>B</given-names> </name><name name-style="western"><surname>Bates</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Hornik</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gebhardt</surname><given-names>A</given-names> </name><name name-style="western"><surname>Firth</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Package mass</article-title><source>The Comprehensive R Archive Network</source><year>2013</year><access-date>2024-07-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/MASS/index.html">https://cran.r-project.org/web/packages/MASS/index.html</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Wickham</surname><given-names>H</given-names> </name><name name-style="western"><surname>Wickham</surname><given-names>MH</given-names> </name></person-group><article-title>Package tidyverse</article-title><source>The Comprehensive R Archive Network</source><year>2017</year><access-date>2024-07-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/tidyverse/index.html">https://cran.r-project.org/web/packages/tidyverse/index.html</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Bates</surname><given-names>D</given-names> </name><name name-style="western"><surname>Maechler</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bolker</surname><given-names>B</given-names> </name><name name-style="western"><surname>Walker</surname><given-names>S</given-names> </name><name name-style="western"><surname>Christensen</surname><given-names>RHB</given-names> </name><name name-style="western"><surname>Singmann</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Package lme4</article-title><source>The Comprehensive R Archive Network</source><year>2015</year><access-date>2024-07-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/lme4/index.html">https://cran.r-project.org/web/packages/lme4/index.html</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kuznetsova</surname><given-names>A</given-names> </name><name name-style="western"><surname>Brockhoff</surname><given-names>PB</given-names> </name><name name-style="western"><surname>Christensen</surname><given-names>RHB</given-names> </name></person-group><article-title>LmerTest: tests in linear mixed effects models</article-title><source>J Stat Softw</source><year>2015</year><volume>2</volume><issue>13</issue><fpage>734</fpage><pub-id pub-id-type="doi">10.18637/jss.v082.i13</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Tierney</surname><given-names>NJ</given-names> </name><name name-style="western"><surname>Cook</surname><given-names>DH</given-names> </name></person-group><article-title>Expanding tidy data principles to facilitate missing data exploration, visualization and assessment of imputations</article-title><source>arXiv</source><comment>Preprint posted online on  Sep 7, 2018</comment><pub-id pub-id-type="doi">10.48550/arXiv.1809.02264</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Buuren</surname><given-names>S</given-names> </name><name name-style="western"><surname>Groothuis-Oudshoorn</surname><given-names>K</given-names> </name><name name-style="western"><surname>Robitzsch</surname><given-names>A</given-names> </name><name name-style="western"><surname>Vink</surname><given-names>G</given-names> </name><name name-style="western"><surname>Doove</surname><given-names>L</given-names> </name><name name-style="western"><surname>Jolani</surname><given-names>S</given-names> </name></person-group><article-title>Package mice</article-title><source>The Comprehensive R Archive Network</source><year>2015</year><access-date>2024-07-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/mice/index.html">https://cran.r-project.org/web/packages/mice/index.html</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Bolker</surname><given-names>B</given-names> </name><name name-style="western"><surname>Robinson</surname><given-names>D</given-names> </name><name name-style="western"><surname>Menne</surname><given-names>D</given-names> </name><name name-style="western"><surname>Gabry</surname><given-names>J</given-names> </name><name name-style="western"><surname>Buerkner</surname><given-names>P</given-names> </name></person-group><article-title>Package broom.mixed</article-title><source>The Comprehensive R Archive Network</source><year>2019</year><access-date>2024-07-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/broom.mixed/index.html">https://cran.r-project.org/web/packages/broom.mixed/index.html</ext-link></comment></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Robin</surname><given-names>X</given-names> </name><name name-style="western"><surname>Turck</surname><given-names>N</given-names> </name><name name-style="western"><surname>Hainard</surname><given-names>A</given-names> </name><etal/></person-group><article-title>pROC: an open-source package for R and S+ to analyze and compare ROC curves</article-title><source>BMC Bioinformatics</source><year>2011</year><month>03</month><day>17</day><volume>12</volume><fpage>1</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1186/1471-2105-12-77</pub-id><pub-id pub-id-type="medline">21414208</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Signorell</surname><given-names>A</given-names> </name><name name-style="western"><surname>Aho</surname><given-names>K</given-names> </name><name name-style="western"><surname>Alfons</surname><given-names>A</given-names> </name><name name-style="western"><surname>Anderegg</surname><given-names>N</given-names> </name><name name-style="western"><surname>Aragon</surname><given-names>T</given-names> </name><name name-style="western"><surname>Arachchige</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Package DescTools</article-title><source>The Comprehensive R Archive Network</source><year>2021</year><access-date>2024-07-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web//packages/DescTools/index.html">https://cran.r-project.org/web//packages/DescTools/index.html</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Stekhoven</surname><given-names>DJ</given-names> </name></person-group><article-title>Package missForest</article-title><source>The Comprehensive R Archive Network</source><year>2013</year><access-date>2024-07-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web//packages//missForest/index.html">https://cran.r-project.org/web//packages//missForest/index.html</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arel-Bundock</surname><given-names>V</given-names> </name></person-group><article-title>Data and model summaries in R</article-title><source>J Stat Softw</source><year>2022</year><volume>103</volume><issue>1</issue><fpage>1</fpage><lpage>23</lpage><pub-id pub-id-type="doi">10.18637/jss.v103.i01</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>L&#x00FC;decke</surname><given-names>D</given-names> </name><name name-style="western"><surname>L&#x00FC;decke</surname><given-names>MD</given-names> </name></person-group><article-title>Package sjPlot</article-title><source>The Comprehensive R Archive Network</source><year>2015</year><access-date>2024-07-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/sjPlot/index.html">https://cran.r-project.org/web/packages/sjPlot/index.html</ext-link></comment></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Auguie</surname><given-names>B</given-names> </name><name name-style="western"><surname>Antonov</surname><given-names>A</given-names> </name><name name-style="western"><surname>Auguie</surname><given-names>MB</given-names> </name></person-group><article-title>Package gridextra</article-title><source>The Comprehensive R Archive Network</source><year>2017</year><access-date>2024-07-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/gridExtra/index.html">https://cran.r-project.org/web/packages/gridExtra/index.html</ext-link></comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhou</surname><given-names>L</given-names> </name><name name-style="western"><surname>Braun</surname><given-names>WJ</given-names> </name></person-group><article-title>Fun with the R Grid Package</article-title><source>J Stat Educ</source><year>2010</year><month>11</month><volume>18</volume><issue>3</issue><fpage>1</fpage><lpage>35</lpage><pub-id pub-id-type="doi">10.1080/10691898.2010.11889587</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Fox</surname><given-names>J</given-names> </name><name name-style="western"><surname>Weisberg</surname><given-names>S</given-names> </name><name name-style="western"><surname>Adler</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bates</surname><given-names>D</given-names> </name><name name-style="western"><surname>Baud-Bovy</surname><given-names>G</given-names> </name><name name-style="western"><surname>Ellison</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Package car</article-title><source>The Comprehensive R Archive Network</source><year>2012</year><access-date>2024-07-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/car/index.html">https://cran.r-project.org/web/packages/car/index.html</ext-link></comment></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hsieh</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Speiser</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Neiberg</surname><given-names>RH</given-names> </name><name name-style="western"><surname>Marsh</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Tooze</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Houston</surname><given-names>DK</given-names> </name></person-group><article-title>Factors associated with falls in older adults: a secondary analysis of a 12-month randomized controlled trial</article-title><source>Arch Gerontol Geriatr</source><year>2023</year><month>05</month><volume>108</volume><fpage>104940</fpage><pub-id pub-id-type="doi">10.1016/j.archger.2023.104940</pub-id><pub-id pub-id-type="medline">36709562</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stekhoven</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>B&#x00FC;hlmann</surname><given-names>P</given-names> </name></person-group><article-title>MissForest--non-parametric missing value imputation for mixed-type data</article-title><source>Bioinformatics</source><year>2012</year><month>01</month><day>1</day><volume>28</volume><issue>1</issue><fpage>112</fpage><lpage>118</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btr597</pub-id><pub-id pub-id-type="medline">22039212</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sperrin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>GP</given-names> </name></person-group><article-title>Multiple imputation with missing indicators as proxies for unmeasured variables: simulation study</article-title><source>BMC Med Res Methodol</source><year>2020</year><month>07</month><day>8</day><volume>20</volume><issue>1</issue><fpage>185</fpage><pub-id pub-id-type="doi">10.1186/s12874-020-01068-x</pub-id><pub-id pub-id-type="medline">32640992</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Data simulation for one run of the simulation study.</p><media xlink:href="medinform_v13i1e64354_app1.docx" xlink:title="DOCX File, 35 KB"/></supplementary-material></app-group></back></article>