<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e79307</article-id><article-id pub-id-type="doi">10.2196/79307</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Methods for Addressing Missingness in Electronic Health Record Data for Clinical Prediction Models: Comparative Evaluation</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Digitale</surname><given-names>Jean</given-names></name><degrees>PhD, MPH, BSN</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Franzon</surname><given-names>Deborah</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Pletcher</surname><given-names>Mark J</given-names></name><degrees>MD, MPH</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>McCulloch</surname><given-names>Charles E</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Gennatas</surname><given-names>Efstathios D</given-names></name><degrees>MBBS, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>National Clinician Scholars Program, University of California, San Francisco</institution><addr-line>San Francisco</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Epidemiology and Biostatistics, University of California, San Francisco</institution><addr-line>550 16th St, 2nd Floor</addr-line><addr-line>San Francisco</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Pediatrics, Benioff Children's Hospital, University of California, San Francisco</institution><addr-line>San Francisco</addr-line><addr-line>CA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Benis</surname><given-names>Arriel</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Speiser</surname><given-names>Jaime</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Kern</surname><given-names>Zoltan</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Jean Digitale, PhD, MPH, BSN, Department of Epidemiology and Biostatistics, University of California, San Francisco, 550 16th St, 2nd Floor, San Francisco, CA, 94158, United States, 1 (415) 476-2300; <email>jean.digitale@ucsf.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>14</day><month>11</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e79307</elocation-id><history><date date-type="received"><day>18</day><month>06</month><year>2025</year></date><date date-type="rev-recd"><day>25</day><month>09</month><year>2025</year></date><date date-type="accepted"><day>13</day><month>10</month><year>2025</year></date></history><copyright-statement>&#x00A9; Jean Digitale, Deborah Franzon, Mark J Pletcher, Charles E McCulloch, Efstathios D Gennatas. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 14.11.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e79307"/><abstract><sec><title>Background</title><p>Missing data are a common challenge in electronic health record (EHR)&#x2013;based prediction modeling. Traditional imputation methods may not suit prediction or machine learning models, and real-world use requires workflows that are implementable for both model development and real-time prediction.</p></sec><sec><title>Objective</title><p>We evaluated methods for handling missing data when using EHR data to build clinical prediction models for patients admitted to the pediatric intensive care unit (PICU).</p></sec><sec sec-type="methods"><title>Methods</title><p>Using EHR data containing missing values from an academic medical center PICU, we generated a synthetic complete dataset. From this, we created 300 datasets with missing data under varying mechanisms and proportions of missingness for the outcomes of (1) successful extubation (binary) and (2) blood pressure (continuous). We assessed strategies to address missing data including simple methods (eg, last observation carried forward [LOCF]), complex methods (eg, random forest multiple imputation), and native support for missing values in outcome prediction models.</p></sec><sec sec-type="results"><title>Results</title><p>Across 886 patients and 1220 intubation events, 18.2% of original data were missing. LOCF had the lowest imputation error, followed by random forest imputation (average mean squared error [MSE] improvement over mean imputation: 0.41 [range: 0.30, 0.50] and 0.33 [0.21, 0.43], respectively). LOCF generally outperformed other imputation methods across outcome metrics and models (mean improvement: 1.28% [range: &#x2212;0.07%, 7.2%]). Imputation methods showed more performance variability for the binary outcome (balanced accuracy coefficient of variation: 0.042) than the continuous outcome (mean squared error coefficient of variation: 0.001).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Traditional imputation methods for inferential statistics, such as multiple imputation, may not be optimal for prediction models. The amount of missingness influenced performance more than the missingness mechanism. In datasets with frequent measurements, LOCF and native support for missing values in machine learning models offer reasonable performance for handling missingness at minimal computational cost in predictive analyses.</p></sec></abstract><kwd-group><kwd>clinical prediction models</kwd><kwd>imputation</kwd><kwd>machine learning</kwd><kwd>missing data</kwd><kwd>electronic health record</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background and Significance</title><p>Addressing missing data is necessary for developing a clinical prediction model. Electronic health record (EHR) data are a rich data source but present particular challenges. Missing data may result from lack of documentation or measurement [<xref ref-type="bibr" rid="ref1">1</xref>]. EHR data are generated via clinical care, with values measured at irregular intervals.</p><p>Raw EHR data are often transformed into an analytic dataset by binning variables by time. Missing data arise if a variable is not measured within a time window [<xref ref-type="bibr" rid="ref2">2</xref>]. Measurement frequency (and resultant missingness) is often linked to how abnormal the value is or is expected to be, such that missingness itself may be informative. Given many algorithms require complete data, a principled approach to address missingness is required.</p><p>Techniques for handling missing data for inferential models that seek to describe or causally explain are well established. Missingness is traditionally categorized into three mechanisms [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]:</p><list list-type="order"><list-item><p>Missing completely at random (MCAR)&#x2014;probability of missingness does not depend on variables in the dataset or depends only on observed values of covariates included in the model; for example, a laboratory technician forgets to record results for a patient, unrelated to any characteristics of that patient or their health [<xref ref-type="bibr" rid="ref5">5</xref>].</p></list-item><list-item><p>Missing at random (MAR)&#x2014;probability of missingness depends on observed values in the data, including the outcome; for example, height is not recorded for a patient but is related to weight and sex of the patient, which are present in the EHR.</p></list-item><list-item><p>Missing not at random (MNAR)&#x2014;probability of missingness depends on unobserved values; for example, no lactate is measured on a patient because the clinician expects it to be normal.</p></list-item></list><p>Bias from MNAR can be intractable for inferential models [<xref ref-type="bibr" rid="ref4">4</xref>]. Given EHR data are likely MNAR, this could be problematic if also true for clinical prediction models. For inferential models, simple strategies, such as complete case analysis, mean imputation, and last observation carried forward, are known to produce biased results [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Preferred strategies, such as multiple imputation, incorporate uncertainty into imputed values, thereby accurately characterizing uncertainty in parameter estimates.</p><p>Literature on handling missing data in prediction modeling is less developed. Unlike inferential models, which focus on bias and precision in parameter estimates, prediction models prioritize improving predictive accuracy and interpretability [<xref ref-type="bibr" rid="ref4">4</xref>]. Classic statistical imputation methods may be complex to implement for prediction models [<xref ref-type="bibr" rid="ref8">8</xref>] or less relevant, particularly as medicine advances toward ever more complex machine learning algorithms [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. There is little guidance on best practices to address missing data for clinical prediction models [<xref ref-type="bibr" rid="ref11">11</xref>]. Methods for handling missing data are rarely reported, and complete case analysis is the most common approach [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. This may not only result in bias but also risk significant loss of data in high-dimensional EHR datasets [<xref ref-type="bibr" rid="ref9">9</xref>]. Machine learning is increasingly being used to address missing data, both as imputation models (eg, random forests) [<xref ref-type="bibr" rid="ref13">13</xref>] and by natively handling missing data in prediction models themselves, bypassing the need for imputation altogether. Tree-based methods [<xref ref-type="bibr" rid="ref14">14</xref>] are particularly suited for this task [<xref ref-type="bibr" rid="ref9">9</xref>]. Yet, few studies have compared classic imputation methods with such built-in strategies in EHR data [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>Real-world application of clinical prediction models presents additional challenges. Many risk models currently in practice require complete data or use imputation methods that may be overly simplistic, limiting their usefulness [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Implementing models prospectively requires data workflows that can be applied in the same way to both retrospective data to build the model and new data for real-time prediction for individuals [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. There are no established techniques for managing missing data post-model development. Studies assessing methods for handling missing data in prospective applications on individual patients often used datasets containing only a few predictors [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. These findings may not translate to datasets with more variables because some imputation methods may struggle to handle large numbers of correlated features and binary variables are more likely to be perfectly predicted, leading to overfitting. Furthermore, outcome prediction models after addressing missing data were often standard statistical methods such as logistic regression [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref17">17</xref>] or Cox proportional hazard models [<xref ref-type="bibr" rid="ref16">16</xref>], when different methods for addressing missingness may be preferable for predictive machine learning models.</p></sec><sec id="s1-2"><title>Objective</title><p>We used EHR data from a live use case (predicting extubation readiness of children in the pediatric intensive care unit [PICU]) to generate a synthetic complete dataset to evaluate multiple methods for imputation and their effects on predictive performance. We included methods that learn from training data and apply to new data. As the relative performance of methods varies by type of missingness (MAR, MCAR, and MNAR) and proportion of missing values [<xref ref-type="bibr" rid="ref18">18</xref>], we varied both in our assessment.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Population</title><p>The study population was patients aged &#x003E;30 days and &#x003C;18 years old from the PICU at the University of California, San Francisco (UCSF) Benioff Children&#x2019;s Hospital intubated between January 1, 2013, and March 31, 2023. Patient encounters were eligible for the sample if the child was intubated for more than 24 hours. We excluded patients intubated for less than 24 hours as they were likely intubated for surgeries, procedures, or other indications and extubated quickly without complications. Children with repeated intubations were eligible for inclusion for each intubation event.</p></sec><sec id="s2-2"><title>Data</title><p>We used EHR data extracted from the UCSF Clinical Data Warehouse (updated daily from the real-time EHR). Based on expert opinion, peer-reviewed literature, and group consensus of the UCSF Pediatric Critical Care Research Group, we selected a broad range of clinical, physiologic, and laboratory variables in the EHR that could be relevant to extubation readiness including: vital signs, ventilator settings, laboratory values, medications, neurological status, fluid balance, and other patient characteristics (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Raw data were collapsed into 4-hour time windows [<xref ref-type="bibr" rid="ref1">1</xref>] containing the mean of each numeric or binary variable and mode of each multi-level categorical variable, resulting in 99 variables. Collapsing variables into time windows made imputation more tractable as it increased the probability of at least 1 nonmissing value per time window and more computationally feasible by decreasing the number of observations on which we needed to impute. In practice, nursing assessments are completed at least every 4 hours (with some assessments in the ICU performed hourly); thus, the 4-hour interval is clinically meaningful. The first time window included in the model ended 12 hours after intubation (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Example patient timeline and resultant data. The example patient timeline depicts 4-hour time windows beginning at intubation and ending at extubation. The table in the figure demonstrates the data resulting from this patient that would be included in the model. This patient was intubated at 17:27 on 5/6/2018 and extubated at 08:45 on 5/8/2018. The first time window included as an observation in the model ended 12 hours after intubation at 05:27 on 5/7/2018. Time-varying data from the prior 2 time windows were included for each observation as lagged variables to capture the trajectory of the patient. The final time window included in the model for this patient is from 17:27 to 21:27 on 5/7/2018. The extubation outcome for each time window was the status 12 hours (or 3 time windows) later. The blood pressure outcome for each time window was the value 4 hours (or 1 time window) later.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e79307_fig01.png"/></fig><p>The binary outcome of successful extubation was defined as extubation that did not result in reintubation within 48 hours. The extubation outcome assigned to each time window indicated status 12 hours after the end of the time window (<xref ref-type="fig" rid="figure1">Figure 1</xref>; <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), as we aimed to predict successful extubation prior to clinician actions indicating they already decided to extubate a patient. A secondary outcome of age-adjusted systolic blood pressure percentile [<xref ref-type="bibr" rid="ref19">19</xref>] was added to ascertain whether findings were similar for a continuous outcome. The blood pressure outcome assigned to each time window was the value 4 hours after the end of the time window.</p><p>To generate a synthetic dataset with no missing values (<xref ref-type="fig" rid="figure2">Figure 2</xref>), we filled in missing numeric values with linear interpolation between last value observed and next value observed [<xref ref-type="bibr" rid="ref20">20</xref>]. We filled in all remaining missing values with the nearest nonmissing value. For never observed variables (1.4% of cells), we made reasonable assumptions (eg, we used the standard endotracheal tube [ETT] size formula [age in years/4+4] for pediatrics to fill in missing ETT sizes [<xref ref-type="bibr" rid="ref21">21</xref>]) and then filled in remaining missingness using the <italic>missRanger</italic> package [<xref ref-type="bibr" rid="ref22">22</xref>], which implements random forest imputation with predictive mean matching and is optimized for speed and memory efficiency. Analyses were conducted in R version 4.3.2 [<xref ref-type="bibr" rid="ref23">23</xref>].</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Experimental methods flowchart. EHR: electronic health record.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e79307_fig02.png"/></fig></sec><sec id="s2-3"><title>Missingness Mechanisms</title><p>We induced missingness in the complete, synthetic dataset to simulate 5 missing data mechanisms:</p><list list-type="order"><list-item><p>MCAR</p></list-item><list-item><p>MAR</p></list-item><list-item><p>weak MNAR (twice as much weight given to observed compared with missing variables to determine missingness)</p></list-item><list-item><p>moderate MNAR (equal weight given to observed compared with missing variables to determine missingness)</p></list-item><list-item><p>strong MNAR (missingness solely based on missing variables)</p></list-item></list><p>For each, we created 3 scenarios varying the amount of missingness: approximately 0.5x, 1x, and 2x the percentage of missing cells in the original data, generating 15 total missingness scenarios. We divided variables into 5 groups (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) to create patterns of missing data. The outcome was allowed to influence missingness in MAR and MNAR scenarios (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Using the ampute command in the <italic>mice</italic> package [<xref ref-type="bibr" rid="ref24">24</xref>], we induced missingness in 20 unique datasets (enough to have a distribution of results, but not exceed our computing power) for each of the 15 scenarios, resulting in 300 datasets per outcome. All datasets for both outcomes contained the same predictor variables.</p></sec><sec id="s2-4"><title>Imputation Methods</title><p>To incorporate temporal patterns in the predictors, we added values of time-varying variables from the prior 2 lagged time windows to each row for a total of 265 features. These data were structured in wide format, such that each row included the current value and its 2 lags as separate variables. This allowed the imputation models to use all 3 time points to inform each other&#x2019;s missing values. For simplicity, we treated these repeated measurements as distinct variables in imputation models (rather than using a multilevel model). While some studies suggest this approach yields comparable performance [<xref ref-type="bibr" rid="ref25">25</xref>], others have reported advantages of multilevel models [<xref ref-type="bibr" rid="ref26">26</xref>]. Data were split into training (75% of intubations) and test (25% of intubations) sets ordered by date, with earlier intubations included in training and later ones in test. We ensured each patient was only in either the training or test set.</p><p>To simulate a real-time prediction workflow, imputation models were built using only training data and excluded the outcome [<xref ref-type="bibr" rid="ref15">15</xref>]. In total, 6 methods to handle missingness were applied to each amputed dataset (<xref ref-type="table" rid="table1">Table 1</xref>, <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>):</p><list list-type="order"><list-item><p>Mean: Mean imputation is frequently used in practice [<xref ref-type="bibr" rid="ref27">27</xref>]. We imputed test set values using the unconditional mean (numeric and binary) or mode (factor) in the training set.</p></list-item><list-item><p>Last observation carried forward (LOCF): This simple approach is recommended for imputation in time series data where data are available only before the missing value [<xref ref-type="bibr" rid="ref20">20</xref>] (true in prospective implementation of clinical decision support). We allowed values to be carried forward indefinitely across time windows.</p></list-item><list-item><p>Random forest: Traditional statistical imputation relies on parametric assumptions. However, nonparametric methods, such as this, have been shown to outperform established methods (especially in settings with complex interactions and nonlinear relationships) [<xref ref-type="bibr" rid="ref28">28</xref>]. <italic>Mice</italic> [<xref ref-type="bibr" rid="ref29">29</xref>] imputes missing values by building a random forest for each variable, identifying observations in the same terminal node, and sampling a donor value from one of these observations.</p></list-item><list-item><p>Bayesian imputation under the normal linear model with predictive mean matching (Bayesian/PMM): PMM is a hot deck method where missing values are imputed from cases with observed values matched according to predictions of the imputation model (here, Bayesian imputation under the normal linear model in <italic>mice</italic> [<xref ref-type="bibr" rid="ref29">29</xref>]). PMM is robust against model misspecification and ensures imputed values are constrained to the range of observed data [<xref ref-type="bibr" rid="ref7">7</xref>].</p></list-item><list-item><p>Least absolute shrinkage and selection operator (LASSO): Regularized models, such as LASSO, are beneficial to handle multicollinearity and prevent overfitting in high-dimensional data. We used <italic>mice</italic> [<xref ref-type="bibr" rid="ref29">29</xref>] to fit LASSO-penalized regression models on bootstrap samples of observed data and drew imputed values from the resulting distributions.</p></list-item><list-item><p>Native support for missing data in prediction model (no imputation required): Some machine learning algorithms can handle missing values directly, without dropping cases or requiring separate imputation. We used gradient boosted trees for our primary prediction model [<xref ref-type="bibr" rid="ref30">30</xref>]. The LightGBM package [<xref ref-type="bibr" rid="ref31">31</xref>] allocates missing values to the bins that optimally minimize loss.</p></list-item></list><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Methods for handling missing data.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Method</td><td align="left" valign="bottom">Assumptions</td><td align="left" valign="bottom">Computational complexity</td><td align="left" valign="bottom">Limitations</td><td align="left" valign="bottom">Benefits</td></tr></thead><tbody><tr><td align="left" valign="top">Mean imputation</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Assumes missing values are similar to the mean</p></list-item></list></td><td align="left" valign="top">Negligible</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Artificially reduces variance</p></list-item><list-item><p>Disturbs relationships between variables</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Simple to implement</p></list-item><list-item><p>Constant imputation creates patterns that machine learning can exploit</p></list-item></list></td></tr><tr><td align="left" valign="top">Last observation carried forward</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Assumes stability over time</p></list-item></list></td><td align="left" valign="top">Negligible</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Data remain missing if no prior measurement exists</p></list-item><list-item><p>May not reflect true patient progression</p></list-item><list-item><p>Can introduce bias if trends are not stable over time</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Simple to implement</p></list-item><list-item><p>Reflects how clinicians practice for many variables (assume no changes or re-measure if important)</p></list-item></list></td></tr><tr><td align="left" valign="top">Random forest</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>MCAR<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> or MAR<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></p></list-item></list></td><td align="left" valign="top">High computational cost</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Requires significant computational resources</p></list-item><list-item><p>May not perform well in small datasets</p></list-item><list-item><p>Dearth of packages that allow models to impute on new data</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Nonparametric method handles complex interactions and nonlinear relationships well</p></list-item><list-item><p>Works with mixed data types (categorical and continuous)</p></list-item></list></td></tr><tr><td align="left" valign="top">Bayesian imputation under the normal linear model with predictive mean matching (Bayesian/PMM)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>MCAR or MAR</p></list-item><list-item><p>Assumes normality for underlying distribution</p></list-item></list></td><td align="left" valign="top">High computational cost</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Works best with large samples</p></list-item><list-item><p>Dearth of packages that allow models to impute on new data</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Robust against model misspecification</p></list-item><list-item><p>Ensures imputed values are within the range of observed data</p></list-item></list></td></tr><tr><td align="left" valign="top">Least absolute shrinkage and selection operator (LASSO)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>MCAR or MAR</p></list-item></list></td><td align="left" valign="top">High computational cost</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>May not capture nonlinear relationships well</p></list-item><list-item><p>Dearth of packages that allow models to impute on new data</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Handles multicollinearity well</p></list-item><list-item><p>Prevents overfitting in high-dimensional data</p></list-item></list></td></tr><tr><td align="left" valign="top">Native support for missing data in prediction models</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Varies by model</p></list-item><list-item><p>Methods [<xref ref-type="bibr" rid="ref12">12</xref>] include surrogate splits and allocating missing values to bins that optimally minimize loss</p></list-item></list></td><td align="left" valign="top">None for imputation</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>This capability is not available in all machine learning algorithms</p></list-item><list-item><p>Performance depends on algorithm&#x2019;s internal handling of missing values</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>No need for explicit imputation</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>MCAR: missing completely at random.</p></fn><fn id="table1fn2"><p><sup>b</sup>MAR: missing at random.</p></fn></table-wrap-foot></table-wrap><p>We used available software for imputation to avoid the need to develop custom software and enable our findings to be more readily applicable to practitioners. We did not include deep-learning methods in our experiment as they were impractical with our relatively small sample size. We generated 30 imputations per dataset using the <italic>mice</italic> package [<xref ref-type="bibr" rid="ref29">29</xref>] for methods 3&#x2010;5. Models built with the training data were then used to impute on the test set. We tested the imputations 2 ways. <italic>Mice</italic> purposefully incorporates uncertainty into imputations because it is advantageous for inferential analysis. First, we averaged the imputations to get a more stable estimate of each missing value to use in a single outcome model. Second, we implemented multiple imputation by estimating 30 outcome models and averaging the predicted probabilities for a final prediction.</p></sec><sec id="s2-5"><title>Prediction Model</title><p>For the outcome prediction model, we used gradient boosted trees [<xref ref-type="bibr" rid="ref30">30</xref>]. It is one of the best-performing algorithms in structured data in general and within biomedical datasets [<xref ref-type="bibr" rid="ref32">32</xref>] and uses all cases in training data, even if they are incomplete. To assess whether imputation method performance was consistent across outcome models, we also compared a LASSO outcome model. However, linear models like LASSO cannot accommodate missing values, so this was only performed on imputed datasets. Outcome models were implemented using the rtemis package [<xref ref-type="bibr" rid="ref33">33</xref>] with LightGBM [<xref ref-type="bibr" rid="ref34">34</xref>] and glmnet [<xref ref-type="bibr" rid="ref35">35</xref>]. We used 5-fold cross-validation in the training set to tune hyperparameters and inverse frequency weighting to upweight the minority class given the data were unbalanced.</p></sec><sec id="s2-6"><title>Analysis of Imputation Accuracy</title><p>It is established that in imputation for statistical inference, focusing on improving accuracy of the imputations at the cost of correctly incorporating true uncertainty leads to biased and invalid results [<xref ref-type="bibr" rid="ref7">7</xref>]. However, the relationship between imputation accuracy and prediction model performance is less well-studied. We compared the accuracy of imputations by calculating mean squared error (MSE) for numeric variables and classification error for categorical variables in each dataset and creating box plots. Before calculating MSE, we standardized the variables by dividing by their standard deviations in the complete dataset. To calculate MSE for random forest, Bayesian/PMM, and LASSO, we compared the average of the 30 imputations to the true value. We also assessed whether temporal autocorrelation of each variable was associated with imputation performance across all methods descriptively using scatterplots and quantified using correlation coefficients (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). In addition, we conducted a sensitivity analysis stratifying imputation error by whether a variable&#x2019;s values had been missing in the original data (and filled in to create the synthetic complete dataset) versus not originally missing, to evaluate whether this initial filling step influenced the apparent performance of the imputation methods.</p><p>To assess the accuracy of imputation for different categories of variables, we built linear models for MSE and classification error. Each observation&#x2019;s outcome was the error value for a given variable in a given dataset. Each model included imputation type, missingness type, proportion missing data (0.5x, 1x, and 2x original), and variable group (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). We included all 3-way and 2-way interactions and completed a backward stepwise elimination procedure based on <italic>P</italic> values (included <italic>P</italic>&#x003C;.05) to determine the final models.</p></sec><sec id="s2-7"><title>Analysis of Outcome Model Performance</title><p>We assessed the performance of outcome prediction models for extubation with 2 primary metrics: balanced accuracy [<xref ref-type="bibr" rid="ref36">36</xref>] and area under the receiver operating characteristic curve (AUC) [<xref ref-type="bibr" rid="ref37">37</xref>]. We present secondary results for sensitivity, specificity, positive predictive value, negative predictive value, and F1 [<xref ref-type="bibr" rid="ref38">38</xref>]. We assessed the performance of the outcome prediction models for blood pressure with the primary metric of MSE. Secondary results are presented for mean absolute error, root MSE, and <italic>R</italic><sup>2</sup> [<xref ref-type="bibr" rid="ref39">39</xref>]. We compared these graphically to the performance of a model built with the complete data and calculated the coefficient of variation (CV) to assess variability.</p></sec><sec id="s2-8"><title>Ethical Considerations</title><p>We received ethical approval from the University of California, San Francisco Institutional Review Board (study #17&#x2010;23751), which granted a waiver of informed consent. No financial incentives were provided to patients. Patient privacy and confidentiality were protected through secure data storage, restricted access to authorized study personnel, and compliance with institutional and regulatory requirements.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Data</title><p>The data contained 886 patients and 1220 intubations, 929 (76.1%) of which ended in successful extubation (<xref ref-type="table" rid="table2">Table 2</xref>). The median duration of intubation in the PICU was 4.4 (IQR 2.2-8.5) days, leading to 50,187 four-hour time windows in the analytic dataset.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Description of sample<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variable</td><td align="left" valign="bottom">Value</td></tr></thead><tbody><tr><td align="left" valign="top">Patients</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total, n</td><td align="left" valign="top">886</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female patients, n (%)</td><td align="left" valign="top">405 (45.7)</td></tr><tr><td align="left" valign="top">Race or ethnicity, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Asian</td><td align="left" valign="top">117 (13.9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Black</td><td align="left" valign="top">67 (8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Latinx</td><td align="left" valign="top">340 (40.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other</td><td align="left" valign="top">88 (10.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>White</td><td align="left" valign="top">229 (27.2)</td></tr><tr><td align="left" valign="top">Intubations</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total, n</td><td align="left" valign="top">1220</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age at intubation (years), median (IQR)</td><td align="left" valign="top">4.3 (1-12.1)</td></tr><tr><td align="left" valign="top" colspan="2">Outcome, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Extubation success</td><td align="left" valign="top">929 (76.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Extubation failure</td><td align="left" valign="top">100 (8.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Death</td><td align="left" valign="top">96 (7.9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Tracheostomy</td><td align="left" valign="top">36 (3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Transfer to another unit</td><td align="left" valign="top">25 (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ETT<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> change</td><td align="left" valign="top">34 (2.8)</td></tr><tr><td align="left" valign="top">Duration of intubation (days), median (IQR)</td><td align="left" valign="top">4.4 (2.2-8.5)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Patients intubated multiple times during the study period may have multiple intubation events included in the sample. The binary outcome of successful extubation for the model collapsed all other outcome categories. Here, extubation failure is defined as reintubation within 48 hours, death is death before or within 48 hours of extubation, and ETT change is an extubation that was immediately and purposefully replaced by another ETT (eg, to change the size).</p></fn><fn id="table2fn2"><p><sup>b</sup>ETT: endotracheal tube.</p></fn></table-wrap-foot></table-wrap><p>Missingness for each variable in the original data (collapsed into 4 h time windows) varied from 0% (0/56,287; eg, sex, age) to 77% (43,077/56,287; white blood cells) (<xref ref-type="table" rid="table3">Table 3</xref>; Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Overall, 18.2% of cells (1,012,668/5,561,767) were missing (Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). After simulating missingness, datasets approximating 0.5x, 1x, and 2x missingness of the original data averaged 9.6%, 18.1%, and 35.9% missing cells, respectively.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Missingness in original data<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variable</td><td align="left" valign="bottom">Number of 4-hour time windows missing variable</td><td align="left" valign="bottom">Percent of 4-hour time windows missing variable</td></tr></thead><tbody><tr><td align="left" valign="top">Age at time of intubation (in days)</td><td align="left" valign="top">0</td><td align="left" valign="top">0.0</td></tr><tr><td align="left" valign="top">Medication: total oral morphine equivalents (mg kg<sup>&#x2013;1</sup>)</td><td align="left" valign="top">0</td><td align="left" valign="top">0.0</td></tr><tr><td align="left" valign="top">Sex</td><td align="left" valign="top">0</td><td align="left" valign="top">0.0</td></tr><tr><td align="left" valign="top">Intake or output total milliliter over the prior 12 hours kg<sup>&#x2013;1</sup></td><td align="left" valign="top">167</td><td align="left" valign="top">0.3</td></tr><tr><td align="left" valign="top">Pulse</td><td align="left" valign="top">420</td><td align="left" valign="top">0.7</td></tr><tr><td align="left" valign="top">Respiratory rate (recorded in vital signs)</td><td align="left" valign="top">1179</td><td align="left" valign="top">2.1</td></tr><tr><td align="left" valign="top">ETT<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> size</td><td align="left" valign="top">6179</td><td align="left" valign="top">11.0</td></tr><tr><td align="left" valign="top">Respiratory pattern: tachypneic</td><td align="left" valign="top">7290</td><td align="left" valign="top">13.0</td></tr><tr><td align="left" valign="top">PEEP<sup><xref ref-type="table-fn" rid="table3fn3">d</xref></sup></td><td align="left" valign="top">8975</td><td align="left" valign="top">15.9</td></tr><tr><td align="left" valign="top">Exhaled tidal volume kg<sup>&#x2013;1</sup></td><td align="left" valign="top">10,274</td><td align="left" valign="top">18.3</td></tr><tr><td align="left" valign="top">State behavioral scale</td><td align="left" valign="top">15,888</td><td align="left" valign="top">28.2</td></tr><tr><td align="left" valign="top">Secretion amount (categories: none, scant, small, moderate, large, copious)</td><td align="left" valign="top">16,325</td><td align="left" valign="top">29.0</td></tr><tr><td align="left" valign="top">Glasgow coma scale score</td><td align="left" valign="top">25,751</td><td align="left" valign="top">45.7</td></tr><tr><td align="left" valign="top">pH</td><td align="left" valign="top">25,804</td><td align="left" valign="top">45.8</td></tr><tr><td align="left" valign="top">Upper extremity motor response</td><td align="left" valign="top">28,839</td><td align="left" valign="top">51.2</td></tr><tr><td align="left" valign="top">Cough: present</td><td align="left" valign="top">35,254</td><td align="left" valign="top">62.6</td></tr><tr><td align="left" valign="top">White blood cell count</td><td align="left" valign="top">43,077</td><td align="left" valign="top">76.5</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Example variables display the range of proportion of missingness in the original data (N=56,287 4-h time windows; these data include more time windows than the final analytic dataset because they were compiled before excluding time windows that were constructed solely for creating lagged variables). Detailed data for all variables are available in Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></fn><fn id="table3fn2"><p><sup>b</sup>EET: endotracheal tube.</p></fn><fn id="table3fn3"><p><sup>c</sup>PEEP: positive end-expiratory pressure.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Imputation Performance</title><p>Across 300 datasets with induced missingness per outcome, the same 176 numeric and 6 categorical variables were imputed. <xref ref-type="fig" rid="figure3">Figure 3</xref> presents MSE for datasets for the outcome of extubation.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Imputation test performance metrics (extubation): mean squared error. Each point represents mean squared error calculated for 176 numeric variables for 1 of 300 datasets created for the outcome of extubation. There are 20 datasets per missingness scenario and imputation type represented in each box plot. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward; MAR: missing at random; MCAR: missing completely at random; MNAR: missing not at random.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e79307_fig03.png"/></fig><p>The results for the outcome of blood pressure (which imputed the same variables) were virtually identical (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). Performance in the test sets showed that LOCF had the lowest MSE on average in all missingness scenarios for numeric variables (average improvement of MSE compared with mean imputation was 0.41 for the outcome of extubation [range: 0.30, 0.50]). Random forest imputation was consistently second best (0.33 [0.21, 0.43]), followed by LASSO (0.26 [0.15, 0.35]), Bayesian/PMM (0.22 [0.07, 0.34]), and finally, mean imputation (Reference). Performance overall degraded as the proportion of missing data increased, with proportion missing having a greater effect than missingness mechanism. Classification error displayed similar patterns overall for categorical variables (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). While LOCF and mean imputation did not generally overfit in the training data compared with the test set, all model-based imputation methods overfit the training data, with random forest doing so the least (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendices 4</xref> and <xref ref-type="supplementary-material" rid="app5">5</xref>). Temporal autocorrelation was negatively associated with imputation error for all methods except mean imputation, which showed little association (<italic>r</italic>=&#x2013;0.13; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>). The association was strongest for LOCF (<italic>r</italic>=&#x2013;0.92), followed by random forest (<italic>r</italic>=&#x2013;0.74). For these, error decreased almost monotonically with increasing autocorrelation, indicating substantially better accuracy for more temporally stable variables.</p><p>In a sensitivity analysis, we examined whether the initial filling of originally missing values influenced subsequent comparisons of imputation performance. Overall, MSE was lower for values that were missing in the original data and filled in to create the synthetic complete dataset, especially for LOCF and random forest imputation (2 methods used to fill in the original missingness). Still, both LOCF and random forest continued to achieve the best performance when imputing values not missing in the original data (<xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>).</p><p>In models for mean squared error, a significant 3-way interaction existed between imputation method, proportion missing, and variable group (Tables S4 and S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) due to the fact that (1) all methods except mean imputation degraded with increased missingness and (2) variables with a response of &#x201C;select all that apply&#x201D; were more poorly predicted (<xref ref-type="supplementary-material" rid="app8">Multimedia Appendix 8</xref>). There was no interaction between missingness mechanism and (1) imputation method or (2) variable group.</p></sec><sec id="s3-3"><title>Prediction Model Performance</title><sec id="s3-3-1"><title>Gradient Boosted Models</title><sec id="s3-3-1-1"><title>Extubation: Balanced accuracy</title><p>For the outcome of extubation using gradient boosted models, balanced accuracy was the highest for LOCF (<xref ref-type="fig" rid="figure4">Figure 4</xref>). Mean imputation and no imputation (native support for missing values) performed almost as well. Random forest (both averaged and multiple imputation) also performed well at 0.5x and 1x missingness, but its performance degraded at 2x missingness. The amount of missingness was more influential than the missingness mechanism (Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Gradient boosted model test performance (extubation): balanced accuracy. Each point represents balanced accuracy for 1 dataset. There are 20 datasets per missingness scenario and imputation type represented in each box plot. Balanced accuracy in the complete dataset is represented by a dashed line. AV, average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward; MAR, missing at random; MCAR: missing completely at random; MI: multiple imputation; MNAR: missing not at random.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e79307_fig04.png"/></fig></sec><sec id="s3-3-1-2"><title>Extubation: AUC</title><p>Random forest multiple imputation had the highest AUC for gradient boosted models for 0.5x and 1x missingness, while LOCF did for 2x missingness (<xref ref-type="fig" rid="figure5">Figure 5</xref>). Random forest averaged, mean imputation, and no imputation had reasonable performance but degraded at 2x missingness. The amount of missingness continued to have a greater effect on performance than the missingness mechanism.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Gradient boosted model test performance (extubation): AUC. Each point represents AUC for 1 dataset. There are 20 datasets per missingness scenario and imputation type represented in each box plot. AUC in the complete dataset is represented by a dashed line. Other performance outcome metrics (sensitivity, specificity, positive predictive value [PPV], negative predictive value [NPV], and F1) are presented in <xref ref-type="supplementary-material" rid="app9">Multimedia Appendix 9</xref>. AV, average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward; MAR, missing at random; MCAR: missing completely at random; MI: multiple imputation; MNAR: missing not at random.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e79307_fig05.png"/></fig></sec><sec id="s3-3-1-3"><title>Blood Pressure: MSE</title><p>LOCF had the lowest overall MSE (<xref ref-type="fig" rid="figure6">Figure 6</xref>). Random forest (both averaged and multiple imputation) generally had the next lowest MSE. Unlike in extubation models, mean imputation and no imputation did not perform substantially better than other methods. Performance was again more sensitive to amount of missingness than to missingness mechanism; overall, MSE increased in a stepwise fashion as missingness increased. However, there was less overall variability (CV=0.001) between models than for balanced accuracy (CV=0.042) and AUC (CV=0.012).</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Gradient boosted model test performance (blood pressure): mean squared error (MSE). Each point represents MSE for 1 dataset. There are 20 datasets per missingness scenario and imputation type represented in each box plot. MSE in the complete dataset is represented by a dashed line. Other performance outcome metrics (mean absolute error [MAE], root MSE, and <italic>R</italic><sup>2</sup>) are presented in <xref ref-type="supplementary-material" rid="app10">Multimedia Appendix 10</xref>. AV, average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward; MAR, missing at random; MCAR: missing completely at random; MI: multiple imputation; MNAR: missing not at random.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e79307_fig06.png"/></fig></sec></sec></sec><sec id="s3-4"><title>LASSO Models</title><sec id="s3-4-1"><title>Extubation: Balanced Accuracy</title><p>For the LASSO outcome model, performance and patterns were similar to the gradient boosted model for balanced accuracy, with LOCF demonstrating top performance (Table S7 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>; <xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>). Mean imputation did not perform as well in LASSO models. Interestingly, imputation methods that performed worst&#x2014;Bayesian/PMM and LASSO&#x2014;yielded better results in LASSO models than in gradient boosted models, with less variation in performance across methods (CV=0.029).</p></sec><sec id="s3-4-2"><title>Extubation: AUC</title><p>AUC followed similar patterns to balanced accuracy for LASSO. LOCF had the best performance. Mean imputation again performed worse comparatively, and there was a smaller performance gap between the best and worst imputation methods relative to gradient boosted models (CV=0.011; Table S7 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>; <xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>).</p></sec><sec id="s3-4-3"><title>Blood Pressure: MSE</title><p>LOCF had the lowest overall MSE (<xref ref-type="supplementary-material" rid="app12">Multimedia Appendix 12</xref>). MSE again increased as missingness increased. MSE of LASSO models for all imputation methods was higher than gradient boosted models, although LASSO MSE was consistently closer to the MSE for complete data. The range between the best and worst imputation methods was narrower than for gradient boosted models (CV=0.0008).</p></sec></sec><sec id="s3-5"><title>Overfitting to training data: Both models</title><sec id="s3-5-1"><title>Extubation: Balanced Accuracy and AUC</title><p>Overfitting to the training data (<xref ref-type="supplementary-material" rid="app13">Multimedia Appendices 13</xref> and <xref ref-type="supplementary-material" rid="app14">14</xref>) increased as proportion of missingness increased. It was greatest for the worst-performing imputation methods (Bayesian/PMM and LASSO) in both gradient-boosted and LASSO outcome models. It was higher overall for gradient boosted models than for LASSO outcome models.</p></sec><sec id="s3-5-2"><title>Blood Pressure: MSE</title><p>Gradient boosted models exhibited more overfitting overall and higher variability in overfitting than LASSO models (<xref ref-type="supplementary-material" rid="app15">Multimedia Appendices 15</xref> and <xref ref-type="supplementary-material" rid="app16">16</xref>).</p></sec></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Overview</title><p>EHR data present challenges in how to handle missing data when developing clinical prediction models. It is critical to use methods that are transferable to new data when generating real-time predictions for use cases like clinical decision support tools. In a realistic EHR dataset, we compared imputation methods based on (1) imputation accuracy and (2) outcome prediction.</p></sec><sec id="s4-2"><title>Imputation Performance</title><p>LOCF and random forest multiple imputation consistently had the lowest MSE and classification error. LOCF has performed well in health survey and cohort datasets [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. In our data, per-variable LOCF imputation error decreased monotonically with first-order autocorrelation, indicating better performance for more temporally stable variables. Random forest, while not explicitly designed for longitudinal data, was informed by lagged variables in our implementation and therefore also showed some gains for more temporally stable variables, though the association was weaker than for LOCF. By contrast, mean imputation does not account for correlation across repeated measures, and accordingly showed no relationship between autocorrelation and error. In the PICU, where many measurements are recorded frequently and less frequent measurements often indicate presumed stability over time, LOCF may be particularly suitable, while random forest may provide added value when temporal patterns are weaker.</p><p>As expected, imputation performance degraded as the proportion of missingness increased. J&#x00E4;ger et al [<xref ref-type="bibr" rid="ref13">13</xref>] also found imputation performance generally worsened when difficulty (eg, higher missingness fraction and MNAR) increased. We found that missingness proportion affected imputation quality more than missingness mechanism. Although the imputation methods we employed are only theoretically valid in MCAR and MAR settings, performance in MNAR data was fairly similar, possibly due to the availability of repeated measurements over time, which may have captured underlying patterns. This is fortunate, given that EHR data are often likely MNAR [<xref ref-type="bibr" rid="ref9">9</xref>].</p></sec><sec id="s4-3"><title>Prediction Model Performance</title><p>Many studies do not report imputation performance [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>], focusing instead on prediction performance. In our experiment, LOCF generally yielded the highest prediction performance across outcome types and models. We evaluated whether LOCF&#x2019;s strong performance was influenced by how we constructed the synthetic, complete dataset. Although it may have been somewhat inflated, sensitivity analyses confirmed that LOCF remained a top performer. Its accuracy was greatest for variables with higher first-order autocorrelation, which may help explain its strong performance in this setting. While criticized in inferential statistics for causing bias and low standard errors [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref40">40</xref>], here it outperformed many multiple imputation methods.</p><p>Random forest multiple imputation was the best performing multiple imputation model. Jaeger et al [<xref ref-type="bibr" rid="ref27">27</xref>] also found it led to the best predictive performance across 12 imputation strategies with 2 different outcome models in a registry dataset. Perez-Lebel et al [<xref ref-type="bibr" rid="ref9">9</xref>] attributed multiple imputation&#x2019;s improved performance to ensembling (averaging multiple predictors) rather than accurately capturing the distribution of the missing values (the theoretical basis for its use in inferential statistics).</p><p>Native support for missing values (no imputation) yielded high balanced accuracy for gradient boosted models and reasonable performance for other metrics. Perez-Lebel et al [<xref ref-type="bibr" rid="ref9">9</xref>] concluded that it had the best predictive performance in real-world EHR data, with the lowest computational cost. LOCF (which they did not test) also has very little computational cost but may be less broadly applicable.</p><p>As with imputation performance, missingness mechanism had less impact on prediction performance than proportion of missingness. Performance degraded substantially as missingness fraction increased, consistent with prior findings [<xref ref-type="bibr" rid="ref13">13</xref>]. Interestingly, performance was similar across MAR, MCAR, and MNAR scenarios. Some studies suggest explicitly adding indicator variables for missingness to outcome models improves prediction in MNAR settings [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. However, others argue against them because they are fragile to operational and practice changes and may not generalize well to other settings [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. We therefore did not include such indicators, and performance in MNAR scenarios was comparable to MAR and MCAR scenarios. Machine learning models with native support for missing data may exploit missingness patterns for prediction. If no imputation is employed, missingness patterns should be closely monitored for drift and its effect on predictive performance [<xref ref-type="bibr" rid="ref42">42</xref>].</p><p>Variability between imputation methods differed by model type and performance metric. LASSO outcome models showed less variability than gradient boosted models. In our experiment, imputation method appeared to have a greater impact on the binary outcome than the continuous outcome. Balanced accuracy had the highest variability between methods, followed by AUC; MSE exhibited substantially less variability than either. All imputation methods had lower MSE for gradient boosted models than LASSO models, indicating choice of outcome model mattered more than choice of imputation method.</p></sec><sec id="s4-4"><title>Relationship Between Imputation and Prediction Performance</title><p>The best performing methods for imputation&#x2014;LOCF and random forest multiple imputation&#x2014;also performed well in prediction. Thus, our main results for imputation and prediction performance were largely concordant. Mean imputation had the worst MSE but performed relatively well in gradient boosted models, possibly because constant imputation creates patterns that machine learning can exploit [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. Its performance dropped in LASSO models. Others have noted that more accurate imputation methods do not always yield better predictions [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref27">27</xref>], especially when features are weakly correlated [<xref ref-type="bibr" rid="ref9">9</xref>].</p><p>Overfitting in imputation models reduced imputation accuracy, which subsequently impacted the accuracy of outcome models. Outcome metrics that overfit more (eg, balanced accuracy in gradient boosted models) also showed greater variability between imputation methods in patterns that reflected differences in imputation accuracy.</p></sec><sec id="s4-5"><title>Imputation and Interpretability</title><p>For clinical decision support, it is important to assess how missing data handling affects interpretability. The full promise of artificial intelligence will not be realized if it is not deemed trustworthy and transparent by humans [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. Mean imputation, the most common method of imputation in machine learning clinical prediction models [<xref ref-type="bibr" rid="ref12">12</xref>], may reduce interpretability for clinicians trying to understand predictions for individual patients, as the mean is not meaningful at the individual level. Native support by algorithms may improve interpretability by relying only on recorded values. LOCF is simple to implement and aligns with clinical reasoning&#x2014;if a measurement is expected to change and that change is important, it will be remeasured if possible. Finally, if complex methods like random forests (while less interpretable themselves) generate accurate imputations that reflect a biological or clinical relationship between the predictor and outcome, this could lead to more interpretable outcome models that are also more robust over time and across populations [<xref ref-type="bibr" rid="ref9">9</xref>].</p></sec><sec id="s4-6"><title>Limitations</title><p>The primary limitation of this experiment is its reliance on both a single dataset and a single method for generating the complete dataset on which the analysis is based. However, a key strength is the high fidelity of the data, with detailed, frequent measurements typical of the intensive care unit. This contrasts with EHR data from settings like primary care, where visits may be months or years apart. Although we predicted both a binary and continuous outcome, our dataset was limited to a single PICU at one academic medical center. Raw data were transformed into a structured format with time windows and summary variables, a common approach for EHR data [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. Thus, our findings may not generalize to other settings or data structures.</p><p>We restricted imputation methods in our experiment to those with readily available packages in R that allowed model training on 1 dataset and subsequent imputation in new data. Deep learning methods were not included, nor were novel approaches [<xref ref-type="bibr" rid="ref47">47</xref>-<xref ref-type="bibr" rid="ref49">49</xref>] that may outperform tested methods but are more complex to implement for applied practitioners. Performance could also theoretically be improved by combining imputation approaches (eg, a SuperLearner [<xref ref-type="bibr" rid="ref50">50</xref>]).</p><p>Most existing imputation packages do not allow users to save model parameters to apply on new data, limiting the methods available [<xref ref-type="bibr" rid="ref15">15</xref>]. Some have proposed workarounds, such as stacking data from a new patient with all training data and rerunning multiple imputation models [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. However, this was computationally infeasible in a dataset of our size. Privacy concerns may also prevent access to training data in deployment. Even packages like <italic>mice</italic> that allow imputation on single new cases have limitations&#x2014;each call refits an iteration of the model, making real-time imputation for new patients infeasible due to speed. The lack of scalable imputation tools remains a barrier to progress to deploying real-time clinical prediction models.</p></sec><sec id="s4-7"><title>Conclusion</title><p>When using EHR data with frequent measurements to build a prediction model, LOCF offers reasonable performance with simple implementation. Native support for missing data in machine learning models, such as gradient boosted trees, is the least computationally intensive approach, with decent performance and potentially broader applicability than LOCF. While multiple imputation is the gold standard for inferential models, it is extremely computationally intensive, may not be optimal for prediction models, and may not be feasible in real time. As clinical prediction models continue to integrate into real-time patient care, addressing missing data appropriately remains essential.</p></sec></sec></body><back><ack><p>ChatGPT was used for assistance in writing R code and to make minor copyedits to the manuscript for conciseness and clarity.</p></ack><notes><sec><title>Funding</title><p>This work was supported by NIH F31HL156498 (National Heart, Lung, and Blood Institute, Digitale).</p></sec><sec><title>Data Availability</title><p>The data underlying this article cannot be shared publicly because they contain protected health information.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: CEM (equal), DF (equal), EDG (equal), JD (lead), MJP (equal),</p><p>Data curation: DF (supporting), JD (lead)</p><p>Formal analysis: JD (lead)</p><p>Funding acquisition: CEM (supporting), DF (supporting), EDG (supporting), JD (lead), MJP (supporting)</p><p>Methodology: CEM (equal), DF (equal), EDG (equal), JD (lead), MJP (equal)</p><p>Resources: JD (lead), MJP (supporting)</p><p>Software: EDG (supporting), JD (lead)</p><p>Supervision: CEM (equal), DF (equal), EDG (equal), MJP (equal)</p><p>Validation: JD (lead)</p><p>Visualization: JD (lead)</p><p>Writing &#x2013; original draft: JD (lead)</p><p>Writing &#x2013; review &#x0026; editing: CEM (supporting), DF (supporting), EDG (supporting), JD (lead), MJP (supporting)</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AUC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb2">CV</term><def><p>coefficient of variation</p></def></def-item><def-item><term id="abb3">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb4">ETT</term><def><p>endotracheal tube</p></def></def-item><def-item><term id="abb5">LASSO</term><def><p>least absolute shrinkage and selection operator</p></def></def-item><def-item><term id="abb6">LOCF</term><def><p>last observation carried forward</p></def></def-item><def-item><term id="abb7">MAR</term><def><p>missing at random</p></def></def-item><def-item><term id="abb8">MCAR</term><def><p>missing completely at random</p></def></def-item><def-item><term id="abb9">MI</term><def><p>multiple imputation</p></def></def-item><def-item><term id="abb10">MNAR</term><def><p>missing not at random</p></def></def-item><def-item><term id="abb11">MSE</term><def><p>mean squared error</p></def></def-item><def-item><term id="abb12">PICU</term><def><p>pediatric intensive care unit</p></def></def-item><def-item><term id="abb13">PMM</term><def><p>predictive mean matching</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wells</surname><given-names>BJ</given-names> </name><name name-style="western"><surname>Chagin</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Li</surname><given-names>L</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>B</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Kattan</surname><given-names>MW</given-names> </name></person-group><article-title>Using the landmark method for creating prediction models in large datasets derived from electronic health records</article-title><source>Health Care Manag Sci</source><year>2015</year><month>03</month><volume>18</volume><issue>1</issue><fpage>86</fpage><lpage>92</lpage><pub-id pub-id-type="doi">10.1007/s10729-014-9281-3</pub-id><pub-id pub-id-type="medline">24752545</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Davarmanesh</surname><given-names>P</given-names> </name><name name-style="western"><surname>Song</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Koutra</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sjoding</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Wiens</surname><given-names>J</given-names> </name></person-group><article-title>Democratizing EHR analyses with FIDDLE: a flexible data-driven preprocessing pipeline for structured clinical data</article-title><source>J Am Med Inform Assoc</source><year>2020</year><month>12</month><day>9</day><volume>27</volume><issue>12</issue><fpage>1921</fpage><lpage>1934</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocaa139</pub-id><pub-id pub-id-type="medline">33040151</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rubin</surname><given-names>DB</given-names> </name></person-group><article-title>Inference and missing data</article-title><source>Biometrika</source><year>1976</year><volume>63</volume><issue>3</issue><fpage>581</fpage><lpage>592</lpage><pub-id pub-id-type="doi">10.1093/biomet/63.3.581</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sperrin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>GP</given-names> </name><name name-style="western"><surname>Sisk</surname><given-names>R</given-names> </name><name name-style="western"><surname>Peek</surname><given-names>N</given-names> </name></person-group><article-title>Missing data should be handled differently for prediction than for description or causal explanation</article-title><source>J Clin Epidemiol</source><year>2020</year><month>09</month><volume>125</volume><fpage>183</fpage><lpage>187</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2020.03.028</pub-id><pub-id pub-id-type="medline">32540389</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Getzen</surname><given-names>E</given-names> </name><name name-style="western"><surname>Ungar</surname><given-names>L</given-names> </name><name name-style="western"><surname>Mowery</surname><given-names>D</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Long</surname><given-names>Q</given-names> </name></person-group><article-title>Mining for equitable health: assessing the impact of missing data in electronic health records</article-title><source>J Biomed Inform</source><year>2023</year><month>03</month><volume>139</volume><fpage>104269</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2022.104269</pub-id><pub-id pub-id-type="medline">36621750</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lachin</surname><given-names>JM</given-names> </name></person-group><article-title>Fallacies of last observation carried forward analyses</article-title><source>Clin Trials</source><year>2016</year><month>04</month><volume>13</volume><issue>2</issue><fpage>161</fpage><lpage>168</lpage><pub-id pub-id-type="doi">10.1177/1740774515602688</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Buuren</surname><given-names>S</given-names></name></person-group><source>Flexible Imputation of Missing Data</source><year>2018</year><access-date>2022-11-04</access-date><edition>2</edition><publisher-name>CRC Press</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://stefvanbuuren.name/fimd/sec-pmm.html">https://stefvanbuuren.name/fimd/sec-pmm.html</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wood</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Royston</surname><given-names>P</given-names> </name><name name-style="western"><surname>White</surname><given-names>IR</given-names> </name></person-group><article-title>The estimation and use of predictions for the assessment of model performance using large samples with multiply imputed data</article-title><source>Biom J</source><year>2015</year><month>07</month><volume>57</volume><issue>4</issue><fpage>614</fpage><lpage>632</lpage><pub-id pub-id-type="doi">10.1002/bimj.201400004</pub-id><pub-id pub-id-type="medline">25630926</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Perez-Lebel</surname><given-names>A</given-names> </name><name name-style="western"><surname>Varoquaux</surname><given-names>G</given-names> </name><name name-style="western"><surname>Le Morvan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Josse</surname><given-names>J</given-names> </name><name name-style="western"><surname>Poline</surname><given-names>JB</given-names> </name></person-group><article-title>Benchmarking missing-values approaches for predictive models on health databases</article-title><source>GigaScience</source><year>2022</year><month>04</month><day>15</day><volume>11</volume><fpage>giac013</fpage><pub-id pub-id-type="doi">10.1093/gigascience/giac013</pub-id><pub-id pub-id-type="medline">35426912</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sisk</surname><given-names>R</given-names> </name><name name-style="western"><surname>Sperrin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Peek</surname><given-names>N</given-names> </name><name name-style="western"><surname>van Smeden</surname><given-names>M</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>GP</given-names> </name></person-group><article-title>Imputation and missing indicators for handling missing data in the development and deployment of clinical prediction models: a simulation study</article-title><source>Stat Methods Med Res</source><year>2023</year><month>08</month><volume>32</volume><issue>8</issue><fpage>1461</fpage><lpage>1477</lpage><pub-id pub-id-type="doi">10.1177/09622802231165001</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tsvetanova</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sperrin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Peek</surname><given-names>N</given-names> </name><name name-style="western"><surname>Buchan</surname><given-names>I</given-names> </name><name name-style="western"><surname>Hyland</surname><given-names>S</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>GP</given-names> </name></person-group><article-title>Missing data was handled inconsistently in UK prediction models: a review of method used</article-title><source>J Clin Epidemiol</source><year>2021</year><month>12</month><volume>140</volume><fpage>149</fpage><lpage>158</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2021.09.008</pub-id><pub-id pub-id-type="medline">34520847</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nijman</surname><given-names>S</given-names> </name><name name-style="western"><surname>Leeuwenberg</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Beekers</surname><given-names>I</given-names> </name><etal/></person-group><article-title>Missing data is poorly handled and reported in prediction model studies using machine learning: a literature review</article-title><source>J Clin Epidemiol</source><year>2022</year><month>02</month><volume>142</volume><fpage>218</fpage><lpage>229</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2021.11.023</pub-id><pub-id pub-id-type="medline">34798287</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>J&#x00E4;ger</surname><given-names>S</given-names> </name><name name-style="western"><surname>Allhorn</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bie&#x00DF;mann</surname><given-names>F</given-names> </name></person-group><article-title>A benchmark for data imputation methods</article-title><source>Front Big Data</source><year>2021</year><volume>4</volume><fpage>693674</fpage><pub-id pub-id-type="doi">10.3389/fdata.2021.693674</pub-id><pub-id pub-id-type="medline">34308343</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Breiman</surname><given-names>L</given-names> </name><name name-style="western"><surname>Friedman</surname><given-names>J</given-names> </name><name name-style="western"><surname>Stone</surname><given-names>C</given-names> </name><name name-style="western"><surname>Olshen</surname><given-names>R</given-names> </name></person-group><source>Classification and Regression Trees</source><year>1984</year><access-date>2025-11-10</access-date><publisher-name>Wadsworth &#x0026; Brooks</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.taylorfrancis.com/books/mono/10.1201/9781315139470/classification-regression-trees-leo-breiman-jerome-friedman-olshen-charles-stone">https://www.taylorfrancis.com/books/mono/10.1201/9781315139470/classification-regression-trees-leo-breiman-jerome-friedman-olshen-charles-stone</ext-link></comment><pub-id pub-id-type="other">9781315139470</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hoogland</surname><given-names>J</given-names> </name><name name-style="western"><surname>van Barreveld</surname><given-names>M</given-names> </name><name name-style="western"><surname>Debray</surname><given-names>TPA</given-names> </name><etal/></person-group><article-title>Handling missing predictor values when validating and applying a prediction model to new patients</article-title><source>Stat Med</source><year>2020</year><month>11</month><day>10</day><volume>39</volume><issue>25</issue><fpage>3591</fpage><lpage>3607</lpage><pub-id pub-id-type="doi">10.1002/sim.8682</pub-id><pub-id pub-id-type="medline">32687233</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nijman</surname><given-names>SWJ</given-names> </name><name name-style="western"><surname>Hoogland</surname><given-names>J</given-names> </name><name name-style="western"><surname>Groenhof</surname><given-names>TKJ</given-names> </name><etal/></person-group><article-title>On behalf of the UCC-CVRM and UCC-SMART study groups. Real-time imputation of missing predictor values in clinical practice</article-title><source>Eur Heart J - Digit Health</source><year>2021</year><month>03</month><day>1</day><volume>2</volume><issue>1</issue><fpage>154</fpage><lpage>164</lpage><pub-id pub-id-type="doi">10.1093/ehjdh/ztaa016</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Janssen</surname><given-names>KJM</given-names> </name><name name-style="western"><surname>Vergouwe</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Donders</surname><given-names>ART</given-names> </name><etal/></person-group><article-title>Dealing with missing predictor values when applying clinical prediction models</article-title><source>Clin Chem</source><year>2009</year><month>05</month><volume>55</volume><issue>5</issue><fpage>994</fpage><lpage>1001</lpage><pub-id pub-id-type="doi">10.1373/clinchem.2008.115345</pub-id><pub-id pub-id-type="medline">19282357</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ribeiro</surname><given-names>C</given-names> </name><name name-style="western"><surname>Freitas</surname><given-names>AA</given-names> </name></person-group><article-title>A data-driven missing value imputation approach for longitudinal datasets</article-title><source>Artif Intell Rev</source><year>2021</year><month>12</month><volume>54</volume><issue>8</issue><fpage>6277</fpage><lpage>6307</lpage><pub-id pub-id-type="doi">10.1007/s10462-021-09963-5</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rosner</surname><given-names>B</given-names> </name><name name-style="western"><surname>Cook</surname><given-names>N</given-names> </name><name name-style="western"><surname>Portman</surname><given-names>R</given-names> </name><name name-style="western"><surname>Daniels</surname><given-names>S</given-names> </name><name name-style="western"><surname>Falkner</surname><given-names>B</given-names> </name></person-group><article-title>Determination of blood pressure percentiles in normal-weight children: some methodological issues</article-title><source>Am J Epidemiol</source><year>2008</year><month>03</month><day>15</day><volume>167</volume><issue>6</issue><fpage>653</fpage><lpage>666</lpage><pub-id pub-id-type="doi">10.1093/aje/kwm348</pub-id><pub-id pub-id-type="medline">18230679</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Engels</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Diehr</surname><given-names>P</given-names> </name></person-group><article-title>Imputation of missing longitudinal data: a comparison of methods</article-title><source>J Clin Epidemiol</source><year>2003</year><month>10</month><volume>56</volume><issue>10</issue><fpage>968</fpage><lpage>976</lpage><pub-id pub-id-type="doi">10.1016/s0895-4356(03)00170-7</pub-id><pub-id pub-id-type="medline">14568628</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Ahmed</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Tube</surname><given-names>B</given-names> </name></person-group><source>Endotracheal Tube</source><year>2023</year><publisher-name>StatPearls Publishing</publisher-name><pub-id pub-id-type="medline">30969569</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Mayer</surname><given-names>M</given-names> </name></person-group><source>missRanger: fast imputation of missing values</source><year>2023</year><access-date>2025-10-25</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=missRanger">https://CRAN.R-project.org/package=missRanger</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="web"><article-title>A language and environment for statistical computing</article-title><source>R Core Team</source><access-date>2025-11-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.R-project.org">https://www.R-project.org</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schouten</surname><given-names>RM</given-names> </name><name name-style="western"><surname>Lugtig</surname><given-names>P</given-names> </name><name name-style="western"><surname>Vink</surname><given-names>G</given-names> </name></person-group><article-title>Generating missing values for simulation purposes: a multivariate amputation procedure</article-title><source>J Stat Comput Simul</source><year>2018</year><month>10</month><day>13</day><volume>88</volume><issue>15</issue><fpage>2909</fpage><lpage>2930</lpage><pub-id pub-id-type="doi">10.1080/00949655.2018.1491577</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huque</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Carlin</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Simpson</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>KJ</given-names> </name></person-group><article-title>A comparison of multiple imputation methods for missing data in longitudinal studies</article-title><source>BMC Med Res Methodol</source><year>2018</year><month>12</month><day>12</day><volume>18</volume><issue>1</issue><fpage>168</fpage><pub-id pub-id-type="doi">10.1186/s12874-018-0615-6</pub-id><pub-id pub-id-type="medline">30541455</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>J</given-names> </name><name name-style="western"><surname>Yan</surname><given-names>XS</given-names> </name><name name-style="western"><surname>Chaudhary</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Imputation of missing values for electronic health record laboratory data</article-title><source>NPJ Digit Med</source><year>2021</year><month>10</month><day>11</day><volume>4</volume><issue>1</issue><fpage>1</fpage><lpage>14</lpage><pub-id pub-id-type="doi">10.1038/s41746-021-00518-0</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jaeger</surname><given-names>BC</given-names> </name><name name-style="western"><surname>Cantor</surname><given-names>R</given-names> </name><name name-style="western"><surname>Sthanam</surname><given-names>V</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kirklin</surname><given-names>JK</given-names> </name><name name-style="western"><surname>Rudraraju</surname><given-names>R</given-names> </name></person-group><article-title>Improving outcome predictions for patients receiving mechanical circulatory support by optimizing imputation of missing values</article-title><source>Circ Cardiovasc Qual Outcomes</source><year>2021</year><month>09</month><volume>14</volume><issue>9</issue><fpage>e007071</fpage><pub-id pub-id-type="doi">10.1161/CIRCOUTCOMES.120.007071</pub-id><pub-id pub-id-type="medline">34517728</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shah</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Bartlett</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Carpenter</surname><given-names>J</given-names> </name><name name-style="western"><surname>Nicholas</surname><given-names>O</given-names> </name><name name-style="western"><surname>Hemingway</surname><given-names>H</given-names> </name></person-group><article-title>Comparison of random forest and parametric imputation models for imputing missing data using MICE: a CALIBER study</article-title><source>Am J Epidemiol</source><year>2014</year><month>03</month><day>15</day><volume>179</volume><issue>6</issue><fpage>764</fpage><lpage>774</lpage><pub-id pub-id-type="doi">10.1093/aje/kwt312</pub-id><pub-id pub-id-type="medline">24589914</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Buuren</surname><given-names>SV</given-names> </name><name name-style="western"><surname>Groothuis-Oudshoorn</surname><given-names>K</given-names> </name></person-group><article-title>mice: multivariate imputation by chained equations in R</article-title><source>J Stat Softw</source><year>2011</year><volume>45</volume><issue>3</issue><pub-id pub-id-type="doi">10.18637/jss.v045.i03</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Friedman</surname><given-names>JH</given-names> </name></person-group><article-title>Greedy function approximation: a gradient boosting machine</article-title><source>Ann Statist</source><year>2001</year><volume>29</volume><issue>5</issue><fpage>1180</fpage><lpage>1232</lpage><pub-id pub-id-type="doi">10.1214/aos/1013203451</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Shi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ke</surname><given-names>G</given-names> </name><name name-style="western"><surname>Soukhavong</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Lightgbm: light gradient boosting machine</article-title><year>2023</year><access-date>2025-10-25</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=lightgbm">https://CRAN.R-project.org/package=lightgbm</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Olson</surname><given-names>RS</given-names> </name><name name-style="western"><surname>Cava</surname><given-names>WL</given-names> </name><name name-style="western"><surname>Mustahsan</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Varik</surname><given-names>A</given-names> </name><name name-style="western"><surname>Moore</surname><given-names>JH</given-names> </name></person-group><article-title>Data-driven advice for applying machine learning to bioinformatics problems</article-title><source>Biocomput 2018 World Scientific</source><year>2018</year><fpage>192</fpage><lpage>203</lpage><pub-id pub-id-type="doi">10.1142/9789813235533_0018</pub-id><pub-id pub-id-type="other">978-981-323-552-6</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="thesis"><person-group person-group-type="author"><name name-style="western"><surname>Gennatas</surname><given-names>ED</given-names> </name></person-group><article-title>Towards precision psychiatry: gray matter development and cognition in adolescence</article-title><year>2017</year><access-date>2025-11-04</access-date><publisher-name>University of Pennsylvania</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://repository.upenn.edu/entities/person/8f68097c-4130-40bd-9e6d-453632bbfe1e">https://repository.upenn.edu/entities/person/8f68097c-4130-40bd-9e6d-453632bbfe1e</ext-link></comment></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Ke</surname><given-names>G</given-names> </name><name name-style="western"><surname>Meng</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Finley</surname><given-names>T</given-names> </name><etal/></person-group><source>LightGBM: a highly efficient gradient boosting decision tree</source><year>2017</year><access-date>2023-12-22</access-date><publisher-name>Adv Neural Inf Process Syst Curran Associates, Inc</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://papers.nips.cc/paper_files/paper/2017/hash/6449f44a102fde848669bdd9eb6b76fa-Abstract.html">https://papers.nips.cc/paper_files/paper/2017/hash/6449f44a102fde848669bdd9eb6b76fa-Abstract.html</ext-link></comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Friedman</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hastie</surname><given-names>T</given-names> </name><name name-style="western"><surname>Tibshirani</surname><given-names>R</given-names> </name></person-group><article-title>Regularization paths for generalized linear models via coordinate descent</article-title><source>J Stat Softw</source><year>2010</year><volume>33</volume><issue>1</issue><fpage>1</fpage><lpage>22</lpage><pub-id pub-id-type="medline">20808728</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Brodersen</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Ong</surname><given-names>CS</given-names> </name><name name-style="western"><surname>Stephan</surname><given-names>KE</given-names> </name><name name-style="western"><surname>Buhmann</surname><given-names>JM</given-names> </name></person-group><article-title>The balanced accuracy and its posterior distribution</article-title><year>2010</year><conf-name>2010 20th Int Conf Pattern Recognit</conf-name><fpage>3121</fpage><lpage>3124</lpage><pub-id pub-id-type="doi">10.1109/ICPR.2010.764</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Binuya</surname><given-names>MAE</given-names> </name><name name-style="western"><surname>Engelhardt</surname><given-names>EG</given-names> </name><name name-style="western"><surname>Schats</surname><given-names>W</given-names> </name><name name-style="western"><surname>Schmidt</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Steyerberg</surname><given-names>EW</given-names> </name></person-group><article-title>Methodological guidance for the evaluation and updating of clinical prediction models: a systematic review</article-title><source>BMC Med Res Methodol</source><year>2022</year><month>12</month><day>12</day><volume>22</volume><issue>1</issue><fpage>316</fpage><pub-id pub-id-type="doi">10.1186/s12874-022-01801-8</pub-id><pub-id pub-id-type="medline">36510134</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hicks</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Str&#x00FC;mke</surname><given-names>I</given-names> </name><name name-style="western"><surname>Thambawita</surname><given-names>V</given-names> </name><etal/></person-group><article-title>On evaluation metrics for medical applications of artificial intelligence</article-title><source>Sci Rep</source><year>2022</year><month>08</month><volume>12</volume><issue>1</issue><fpage>5979</fpage><pub-id pub-id-type="doi">10.1038/s41598-022-09954-8</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Efthimiou</surname><given-names>O</given-names> </name><name name-style="western"><surname>Seo</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chalkou</surname><given-names>K</given-names> </name><name name-style="western"><surname>Debray</surname><given-names>T</given-names> </name><name name-style="western"><surname>Egger</surname><given-names>M</given-names> </name><name name-style="western"><surname>Salanti</surname><given-names>G</given-names> </name></person-group><article-title>Developing clinical prediction models: a step-by-step guide</article-title><source>BMJ</source><year>2024</year><month>09</month><day>3</day><fpage>e078276</fpage><pub-id pub-id-type="doi">10.1136/bmj-2023-078276</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Little</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>D&#x2019;Agostino</surname><given-names>R</given-names> </name><name name-style="western"><surname>Cohen</surname><given-names>ML</given-names> </name><etal/></person-group><article-title>The prevention and treatment of missing data in clinical trials</article-title><source>N Engl J Med</source><year>2012</year><month>10</month><day>4</day><volume>367</volume><issue>14</issue><fpage>1355</fpage><lpage>1360</lpage><pub-id pub-id-type="doi">10.1056/NEJMsr1203730</pub-id><pub-id pub-id-type="medline">23034025</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sharafoddini</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dubin</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Maslove</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>J</given-names> </name></person-group><article-title>A new insight into missing data in intensive care unit patient profiles: observational study</article-title><source>JMIR Med Inform</source><year>2019</year><month>01</month><day>8</day><volume>7</volume><issue>1</issue><fpage>e11605</fpage><pub-id pub-id-type="doi">10.2196/11605</pub-id><pub-id pub-id-type="medline">30622091</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Smeden</surname><given-names>M</given-names> </name><name name-style="western"><surname>Groenwold</surname><given-names>RHH</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KG</given-names> </name></person-group><article-title>A cautionary note on the use of the missing indicator method for handling missing data in prediction research</article-title><source>J Clin Epidemiol</source><year>2020</year><month>09</month><volume>125</volume><fpage>188</fpage><lpage>190</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2020.06.007</pub-id><pub-id pub-id-type="medline">32565213</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Josse</surname><given-names>J</given-names> </name><name name-style="western"><surname>Prost</surname><given-names>N</given-names> </name><name name-style="western"><surname>Scornet</surname><given-names>E</given-names> </name><name name-style="western"><surname>Varoquaux</surname><given-names>G</given-names> </name></person-group><article-title>On the consistency of supervised learning with missing values</article-title><source>arXiv</source><comment>Preprint posted online on 2020</comment><pub-id pub-id-type="doi">10.48550/arXiv.1902.06931</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Payrovnaziri</surname><given-names>SN</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Rengifo-Moreno</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Explainable artificial intelligence models using real-world electronic health record data: a systematic scoping review</article-title><source>J Am Med Inform Assoc</source><year>2020</year><month>07</month><day>1</day><volume>27</volume><issue>7</issue><fpage>1173</fpage><lpage>1185</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocaa053</pub-id><pub-id pub-id-type="medline">32417928</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shortliffe</surname><given-names>EH</given-names> </name><name name-style="western"><surname>Sep&#x00FA;lveda</surname><given-names>MJ</given-names> </name></person-group><article-title>Clinical decision support in the era of artificial intelligence</article-title><source>JAMA</source><year>2018</year><month>12</month><day>4</day><volume>320</volume><issue>21</issue><fpage>2199</fpage><lpage>2200</lpage><pub-id pub-id-type="doi">10.1001/jama.2018.17163</pub-id><pub-id pub-id-type="medline">30398550</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gupta</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gallamoza</surname><given-names>B</given-names> </name><name name-style="western"><surname>Cutrona</surname><given-names>N</given-names> </name><name name-style="western"><surname>Dhakal</surname><given-names>P</given-names> </name><name name-style="western"><surname>Poulain</surname><given-names>R</given-names> </name><name name-style="western"><surname>Beheshti</surname><given-names>R</given-names> </name></person-group><article-title>An extensive data processing pipeline for MIMIC-IV</article-title><source>Proc Mach Learn Res</source><year>2022</year><month>11</month><volume>193</volume><fpage>311</fpage><lpage>325</lpage><pub-id pub-id-type="medline">36686986</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luo</surname><given-names>Y</given-names> </name></person-group><article-title>Evaluating the state of the art in missing data imputation for clinical data</article-title><source>Brief Bioinform</source><year>2022</year><month>01</month><day>17</day><volume>23</volume><issue>1</issue><fpage>bbab489</fpage><pub-id pub-id-type="doi">10.1093/bib/bbab489</pub-id><pub-id pub-id-type="medline">34882223</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kazijevs</surname><given-names>M</given-names> </name><name name-style="western"><surname>Samad</surname><given-names>MD</given-names> </name></person-group><article-title>Deep imputation of missing values in time series health data: a review with benchmarking</article-title><source>J Biomed Inform</source><year>2023</year><month>08</month><volume>144</volume><fpage>104440</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2023.104440</pub-id><pub-id pub-id-type="medline">37429511</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Li</surname><given-names>S</given-names> </name><name name-style="western"><surname>Yuan</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Handling missing values in healthcare data: a systematic review of deep learning-based imputation techniques</article-title><source>Artif Intell Med</source><year>2023</year><month>08</month><volume>142</volume><fpage>102587</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2023.102587</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van der Laan</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Polley</surname><given-names>EC</given-names> </name><name name-style="western"><surname>Hubbard</surname><given-names>AE</given-names> </name></person-group><article-title>Super learner</article-title><source>Stat Appl Genet Mol Biol</source><year>2007</year><volume>6</volume><issue>1</issue><pub-id pub-id-type="doi">10.2202/1544-6115.1309</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Supplementary methods and tables.</p><media xlink:href="medinform_v13i1e79307_app1.pdf" xlink:title="PDF File, 727 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Imputation test performance metrics (blood pressure): mean squared error. Each point represents mean squared error calculated for 176 numeric variables for 1 of 300 datasets created for the outcome of blood pressure. There are 20 datasets per missingness scenario and imputation type represented in each box plot. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward.</p><media xlink:href="medinform_v13i1e79307_app2.pdf" xlink:title="PDF File, 126 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Imputation test performance metrics (extubation and blood pressure): classification error. Each point represents classification error calculated for 6 categorical variables for 1 dataset (of 300 created for the outcome of extubation and 300 created for the outcome of blood pressure). There are 20 datasets per missingness scenario and imputation type represented in each box plot. The mice implementation of LASSO cannot accommodate multiclass categorical outcomes; thus, we used a simple classification tree. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward.</p><media xlink:href="medinform_v13i1e79307_app3.pdf" xlink:title="PDF File, 249 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Imputation performance difference between train and test (extubation and blood pressure): mean squared error. Each point represents the difference in mean squared error between training and test sets calculated for 176 numeric variables for 1 dataset (of 300 created for the outcome of extubation and 300 created for the outcome of blood pressure). There are 20 datasets per missingness scenario and imputation type represented in each box plot. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward.</p><media xlink:href="medinform_v13i1e79307_app4.pdf" xlink:title="PDF File, 256 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>Imputation performance difference between train and test (extubation and blood pressure): classification error. Each point represents the difference in mean squared error between training and test sets calculated for 6 categorical variables for 1 dataset (of 300 created for the outcome of extubation and 300 created for the outcome of blood pressure). There are 20 datasets per missingness scenario and imputation type represented in each box plot. The mice implementation of LASSO cannot accommodate multiclass categorical outcomes; thus, we used a simple classification tree. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward.</p><media xlink:href="medinform_v13i1e79307_app5.pdf" xlink:title="PDF File, 255 KB"/></supplementary-material><supplementary-material id="app6"><label>Multimedia Appendix 6</label><p>Autocorrelation versus imputation error by method (extubation). Scatterplot of per-variable temporal autocorrelation (AR(1) coefficient, <italic>x</italic>-axis) versus imputation error (<italic>y</italic>-axis; mean squared error (MSE) for numeric variables, classification error for categorical) using 4-hour windows. Each point represents 1 predictor; higher persistence corresponds to lower error for last observation carried forward (LOCF).</p><media xlink:href="medinform_v13i1e79307_app6.pdf" xlink:title="PDF File, 22 KB"/></supplementary-material><supplementary-material id="app7"><label>Multimedia Appendix 7</label><p>Imputation test performance metrics by missingness in original data (extubation and blood pressure): mean squared error. Each point represents mean squared error calculated for 176 numeric variables for 1 dataset stratified by whether the value was missing in the original data (of 300 created for the outcome of extubation and 300 created for the outcome of blood pressure). There are 20 datasets per missingness scenario and imputation type represented in each box plot. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward.</p><media xlink:href="medinform_v13i1e79307_app7.pdf" xlink:title="PDF File, 768 KB"/></supplementary-material><supplementary-material id="app8"><label>Multimedia Appendix 8</label><p>Marginal means for interaction between imputation method, proportion missing, and variable group (extubation and blood pressure). We calculated marginal means for interaction from linear models of mean squared error comparing imputed values to complete dataset (for each outcome: 1 observation [n=264,000] per 176 variables per 300 datasets per 5 imputation methods). We calculated marginal means for interaction from linear models of classification error comparing imputed values to complete dataset (for each outcome: 1 observation [n=9000] per 6 variables per 300 datasets per 5 imputation methods) with random intercepts for each of the 1500 datasets. We included all 3-way and 2-way interactions and completed a backward stepwise elimination procedure (included <italic>P</italic>&#x003C;.05) to determine the final model. Variable group 4, unlike other variable groups, included mostly indicator variables constructed from &#x201C;select all that apply&#x201D; responses in the electronic health record (EHR) (see Table S1). Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward.</p><media xlink:href="medinform_v13i1e79307_app8.pdf" xlink:title="PDF File, 76 KB"/></supplementary-material><supplementary-material id="app9"><label>Multimedia Appendix 9</label><p>Gradient-boosted model test performance (extubation): other metrics. Each point represents performance for 1 dataset. There are 20 datasets per missingness scenario and imputation type represented in each box plot. Performance in the complete dataset is represented by a dashed line. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward; MI: multiple imputation; NPV: negative predictive value; PPV: positive predictive value.</p><media xlink:href="medinform_v13i1e79307_app9.pdf" xlink:title="PDF File, 1010 KB"/></supplementary-material><supplementary-material id="app10"><label>Multimedia Appendix 10</label><p>Gradient boosted model test performance (blood pressure): other metrics. Each point represents performance for 1 dataset. There are 20 datasets per missingness scenario and imputation type represented in each box plot. Performance in the complete dataset is represented by a dashed line. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward; MAE: mean absolute error; MI: multiple imputation; NPV: negative predictive value; PPV: positive predictive value; RMSE: root mean squared error.</p><media xlink:href="medinform_v13i1e79307_app10.pdf" xlink:title="PDF File, 617 KB"/></supplementary-material><supplementary-material id="app11"><label>Multimedia Appendix 11</label><p>LASSO test performance (extubation): all metrics. Each point represents performance for 1 dataset. There are 20 datasets per missingness scenario and imputation type represented in each box plot. Performance in the complete dataset is represented by a dashed line. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward; MAE: mean absolute error; MI: multiple imputation; NPV: negative predictive value; PPV: positive predictive value; RMSE: root mean squared error.</p><media xlink:href="medinform_v13i1e79307_app11.pdf" xlink:title="PDF File, 1232 KB"/></supplementary-material><supplementary-material id="app12"><label>Multimedia Appendix 12</label><p>LASSO test performance (blood pressure): all metrics. Each point represents performance for 1 dataset. There are 20 datasets per missingness scenario and imputation type represented in each box plot. Performance in the complete dataset is represented by a dashed line. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward; MAE: mean absolute error; MI: multiple imputation; MSE: mean squared error; NPV: negative predictive value; PPV: positive predictive value; RMSE: root mean squared error.</p><media xlink:href="medinform_v13i1e79307_app12.pdf" xlink:title="PDF File, 719 KB"/></supplementary-material><supplementary-material id="app13"><label>Multimedia Appendix 13</label><p>Gradient-boosted model difference between train and test (extubation): All performance metrics. Each point represents a difference in performance between train and test sets for 1 dataset. There are 20 datasets per missingness scenario and imputation type represented in each box plot. The difference in the complete dataset is represented by a dashed line. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward; MAE: mean absolute error; MI: multiple imputation; MSE: mean squared error; NPV: negative predictive value; PPV: positive predictive value; RMSE: root mean squared error.</p><media xlink:href="medinform_v13i1e79307_app13.pdf" xlink:title="PDF File, 1428 KB"/></supplementary-material><supplementary-material id="app14"><label>Multimedia Appendix 14</label><p>LASSO model difference between train and test (extubation): all performance metrics. Each point represents a difference in performance between train and test sets for 1 dataset. There are 20 datasets per missingness scenario and imputation type represented in each box plot. The difference in the complete dataset is represented by a dashed line. AUC: area under the receiver operating characteristic curve; AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward; MAE: mean absolute error; MI: multiple imputation; MSE: mean squared error; NPV: negative predictive value; PPV: positive predictive value; RMSE: root mean squared error.</p><media xlink:href="medinform_v13i1e79307_app14.pdf" xlink:title="PDF File, 1247 KB"/></supplementary-material><supplementary-material id="app15"><label>Multimedia Appendix 15</label><p>Gradient boosted model difference between train and test (blood pressure): all performance metrics. Each point represents a difference in performance between train and test sets for 1 dataset. There are 20 datasets per missingness scenario and imputation type represented in each box plot. The difference in the complete dataset is represented by a dashed line. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward; MAE: mean absolute error; MI: multiple imputation; MSE: mean squared error; NPV: negative predictive value; PPV: positive predictive value; RMSE: root mean squared error.</p><media xlink:href="medinform_v13i1e79307_app15.pdf" xlink:title="PDF File, 819 KB"/></supplementary-material><supplementary-material id="app16"><label>Multimedia Appendix 16</label><p>LASSO model difference between train and test (blood pressure): all performance metrics. Each point represents a difference in performance between train and test sets for 1 dataset. There are 20 datasets per missingness scenario and imputation type represented in each box plot. The difference in the complete dataset is represented by a dashed line. AV: average (average of 30 imputations); Bayesian/PMM: Bayesian imputation under the normal linear model with predictive mean matching; LASSO: least absolute shrinkage and selection operator; LOCF: last observation carried forward; MAE: mean absolute error; MI: multiple imputation; MSE: mean squared error; NPV: negative predictive value; PPV: positive predictive value; RMSE: root mean squared error.</p><media xlink:href="medinform_v13i1e79307_app16.pdf" xlink:title="PDF File, 711 KB"/></supplementary-material></app-group></back></article>