<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i7e15965</article-id>
      <article-id pub-id-type="pmid">32735230</article-id>
      <article-id pub-id-type="doi">10.2196/15965</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>A Predictive Model Based on Machine Learning for the Early Detection of Late-Onset Neonatal Sepsis: Development and Observational Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Shin</surname>
            <given-names>Soo-Yong</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Agakov</surname>
            <given-names>Felix</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Aminbeidokhti</surname>
            <given-names>Amirhossein</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Song</surname>
            <given-names>Wongeun</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4314-3995</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Jung</surname>
            <given-names>Se Young</given-names>
          </name>
          <degrees>MPH, MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9946-8807</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Baek</surname>
            <given-names>Hyunyoung</given-names>
          </name>
          <degrees>RN, MPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0810-9396</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Choi</surname>
            <given-names>Chang Won</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1911-0253</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Jung</surname>
            <given-names>Young Hwa</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4159-586X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Yoo</surname>
            <given-names>Sooyoung</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Healthcare ICT Research Center</institution>
            <institution>Office of eHealth Research and Businesses</institution>
            <institution>Seoul National University Bundang Hospital</institution>
            <addr-line>172 Dolma-ro, Bundang-gu</addr-line>
            <addr-line>Seongnam-si, 13620</addr-line>
            <country>Republic of Korea</country>
            <phone>82 32 787 8980</phone>
            <email>yoosoo0@snubh.org</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8620-4925</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Healthcare ICT Research Center</institution>
        <institution>Office of eHealth Research and Businesses</institution>
        <institution>Seoul National University Bundang Hospital</institution>
        <addr-line>Seongnam-si</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Pediatrics</institution>
        <institution>Seoul National University Bundang Hospital</institution>
        <addr-line>Seongnam-si</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Sooyoung Yoo <email>yoosoo0@snubh.org</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>31</day>
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>7</issue>
      <elocation-id>e15965</elocation-id>
      <history>
        <date date-type="received">
          <day>22</day>
          <month>8</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>16</day>
          <month>10</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>27</day>
          <month>3</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>7</day>
          <month>6</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Wongeun Song, Se Young Jung, Hyunyoung Baek, Chang Won Choi, Young Hwa Jung, Sooyoung Yoo. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 31.07.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2020/7/e15965/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Neonatal sepsis is associated with most cases of mortalities and morbidities in the neonatal intensive care unit (NICU). Many studies have developed prediction models for the early diagnosis of bloodstream infections in newborns, but there are limitations to data collection and management because these models are based on high-resolution waveform data.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to examine the feasibility of a prediction model by using noninvasive vital sign data and machine learning technology.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used electronic medical record data in intensive care units published in the Medical Information Mart for Intensive Care III clinical database. The late-onset neonatal sepsis (LONS) prediction algorithm using our proposed forward feature selection technique was based on NICU inpatient data and was designed to detect clinical sepsis 48 hours before occurrence. The performance of this prediction model was evaluated using various feature selection algorithms and machine learning models.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The performance of the LONS prediction model was found to be comparable to that of the prediction models that use invasive data such as high-resolution vital sign data, blood gas estimations, blood cell counts, and pH levels. The area under the receiver operating characteristic curve of the 48-hour prediction model was 0.861 and that of the onset detection model was 0.868. The main features that could be vital candidate markers for clinical neonatal sepsis were blood pressure, oxygen saturation, and body temperature. Feature generation using kurtosis and skewness of the features showed the highest performance.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The findings of our study confirmed that the LONS prediction model based on machine learning can be developed using vital sign data that are regularly measured in clinical settings. Future studies should conduct external validation by using different types of data sets and actual clinical verification of the developed model.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>prediction</kwd>
        <kwd>late-onset neonatal sepsis</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>With the developments in the care system of neonate intensive care units (NICUs), the survival rates of very low birth weight infants have greatly increased. However, neonatal sepsis is still associated with most morbidities and mortalities in the NICUs, and 20% of the deaths in infants weighing &#60;1500 g has been reported to be caused by sepsis. Moreover, infants with sepsis are about three times more likely to die compared to those without sepsis [<xref ref-type="bibr" rid="ref1">1</xref>]. Neonatal sepsis is categorized into early-onset neonatal sepsis occurring within 72 hours of birth and late-onset neonatal sepsis (LONS) occurring between 72 hours and 120 days after birth [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Early-onset neonatal sepsis is caused by an in utero infection or by vertical bacterial transmission from the mother during vaginal delivery, while LONS is caused not only by vertical bacterial transmission but also by horizontal bacterial transmission from health care providers and the environment.</p>
      <p>Sepsis due to group B <italic>Streptococcus</italic>, which is the most common cause of early-onset neonatal sepsis, can be reduced by 80% before delivery, and intrapartum antibiotic prophylaxis is given when necessary. However, in the case of LONS, unlike early-onset neonatal sepsis, there is no specific antibiotic prophylaxis and there is no robust algorithm that can contribute to its early detection in nonsymptomatic newborns [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. A blood culture test is required for the confirmatory diagnosis of LONS, but it takes an average of 2-3 days to obtain blood culture results. Generally, empirical antibiotic treatments are prescribed to reduce the risk of treatment delay. Even if a negative finding is reported for blood culture, antibiotic therapy is prolonged when the clinical symptoms of LONS are manifested because of the possibility of false-negative blood culture results [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. This treatment process results in bacterial resistance, adverse effects due to prolonged antibiotic therapy, and increased medical costs.</p>
      <p>Since several studies have analyzed medical imaging data such as computed tomography and magnetic resonance imaging scans and radiographs by using deep learning and machine learning, recent studies have developed prediction models for the early diagnosis of bloodstream infections and symptomatic systemic inflammatory response syndrome in newborns [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>].</p>
      <p>Griffin et al [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>] presented a method for identifying the early stage of sepsis by checking the abnormal phase of heart rate characteristics. Stanculescu et al [<xref ref-type="bibr" rid="ref12">12</xref>] applied the autoregressive hidden Markov model to physiological events such as desaturation and bradycardia in infants and predicted the occurrence of an infection by using the onset prediction model. In addition, a model was presented to make predictions by generating a machine learning model based on vital signs or laboratory features recorded in the electronic medical record (EMR) of an infant [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. However, heart rate characteristics can be affected by respiratory deterioration and surgical procedures in addition to sepsis [<xref ref-type="bibr" rid="ref15">15</xref>] and heart rate characteristics cannot be obtained in patient monitors without an heart rate characteristic index function. The existing prediction models also involved high computational cost, high-resolution data, or laboratory parameters such as complete blood cell count, immature neutrophil to total neutrophil ratio, and polymorphonuclear leukocyte counts.</p>
      <p>Studies on machine learning prediction models using EMR data have inherent problems such as high dimensionality and sparsity, data bias, and few abnormal events [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]. Previous studies have tried to resolve the abovementioned problems by using several techniques such as oversampling, undersampling, data handling, and feature selection [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. However, the performance of the model that learned processed data by using data augmentation has not significantly improved compared to that of the previous prediction models, and the EMR-based prediction model is still being challenged [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Therefore, by using data from the Medical Information Mart for Intensive Care III (MIMIC-III) database [<xref ref-type="bibr" rid="ref23">23</xref>], we aimed to apply the feature selection algorithm to develop a machine learning model that reliably predicts LONS by using low sparsity and few scenarios and to examine the feasibility of the developed prediction model by using noninvasive vital sign data and machine learning technology. In addition, we sought to identify clinically significant vital signs and their corresponding feature analysis methods in LONS.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Source and Target Population</title>
        <p>In this study, the MIMIC-III database [<xref ref-type="bibr" rid="ref23">23</xref>], which consisted of Beth Israel Deaconess Medical Center’s public data on admission in the intensive care unit, was used as the data source. The use of data from the MIMIC-III database for research was approved by the Institutional Review Boards of Beth Israel Deaconess Medical Center and Massachusetts Institute of Technology. NICU inpatients in the 2001-2008 MIMIC-III database were selected as the total population, and their data were extracted. The patients were assigned to sepsis and control groups. The sepsis group consisted of patients with diagnostic codes of septicemia, infections specific to the perinatal period, sepsis, septic shock, systemic inflammatory response syndrome, etc, based on the discharge report. The diagnostic record of the MIMIC-III database utilized the International Classification of Diseases, Ninth Revision, Clinical Modification codes 038 (septicemia), 771 (infants specific to the perinatal period), 995.9 (systemic inflammatory response syndrome), or 785.52 (septic shock), including the abovementioned diagnosis.</p>
      </sec>
      <sec>
        <title>Identification of the Sepsis Diagnosis Events</title>
        <p>Since the diagnosis table of the MIMIC-III database does not contain information on the timing of diagnosis, this information had to be extracted indirectly from the laboratory test order and intervention information to deduce the timing of diagnosis. Generally, positive blood culture results, clinical deterioration, and high C-reactive protein levels are considered as risk factors, and antibiotic treatment is given by aggregating the information on risk factors [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. However, in preterm infants, it is difficult to distinguish the normal conditions of the neonatal period from the clinical signs of sepsis, and since the C-reactive protein value could not be obtained from the MIMIC-III database, the timing of sepsis diagnosis was extracted based on the time of blood culture testing and antibiotic prescription. Generally, a positive blood culture result is selected as the gold standard based on the criteria used to confirm sepsis. However, since the amount of blood samples that can be collected from preterm or very low birth weight infants is very limited, the number of blood cultures was also small. Moreover, false-negative results may occur because of the low sensitivity of the blood culture, prior use of broad-spectrum antibiotics, and incubation time of the neonatal blood culture [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. Therefore, the timing of sepsis diagnosis was extracted based on the time of the administration of the order of broad-spectrum antibiotics, time of antibiotic administration through intravenous routes, and the time of blood culture order. In the MIMIC-III database, the date on which the item of SPEC_TYPE_DESC in the MICROBIOLOGY EVENTS table was marked as BLOOD CULTURE was assigned as the date of blood culture and the date on which the DRUG was broad-spectrum antibiotics and the ROUTE was filled as IV was used as the antibiotic date in the PRESCRIPTION table.</p>
      </sec>
      <sec>
        <title>Feature Processing and Imputation</title>
        <p>In the machine learning model, the following features were selected: heart rate, systolic blood pressure, diastolic blood pressure, mean blood pressure, oxygen saturation, respiratory rate, and body temperature. In the MIMIC-III database, the vital sign and laboratory data that can be used as the candidate features of the predictive models were heart rate, systolic blood pressure, diastolic blood pressure, mean blood pressure, respiratory rate, body temperature, oxygen saturation, Glasgow Coma Scale score, white blood cell count, red blood cell count, platelet count, bilirubin level, albumin level, pH, potassium level, sodium level, creatinine level, blood urea nitrogen, glucose level, partial pressure of carbon dioxide, fraction of inspired oxygen, serum bicarbonate levels, hematocrit, tidal volume, mean airway pressure, peak airway pressure, plateau airway pressure, and Apgar score. Among them, the primary vital signs (body temperature, heart rate, respiratory rate, and blood pressure) and oxygen saturation levels were recorded periodically, whereas the utilization of the other measured values were limited because they were not recorded periodically or they were recorded only for specific patients. Therefore, body temperature, heart rate, respiratory rate, blood pressure, and oxygen saturation that can be commonly applied in predictive models were selected as the features. Moreover, these vital signs are usually accessible from the bedside, do not involve laboratory tests, and can be applied in most hospitals. However, although the current value of the vital signs can be intuitively used as input data, irregular observation cycles of the patient can increase its complexity. Hence, in this study, we tried to increase the accuracy of the actual physiological deterioration of the patient by additionally calculating the statistical and current values of the vital signs and comparing and evaluating the performance of the significant statistical values and observation period for each vital sign. The statistical values, vital signs, and processed observation window size used for the generation are shown in <xref ref-type="table" rid="table1">Table 1</xref>. In this study, we used statistical values, which are used by many EMR-based prediction models and time series analysis. However, Fourier transform analysis, wavelet transform, and spectrum analysis, which are mainly applied in time series, were excluded from this study because they require high-frequency and relatively periodic data to produce significant results.</p>
        <p>For the normal distribution for goodness of fit test, the Shapiro-Wilk test was used for &#60;5000 samples and the Kolmogorov-Smirnov test was used for ≥5000 samples. These normality tests were used when selecting the suitable statistical method depending on the family of distributions. For the correlation, Pearson’s correlation was used for normally distributed continuous variables; otherwise, Spearman’s correlation was used. Entropy was calculated by estimating the probability density function of the variable with Gaussian kernel density evaluation if a normal distribution was not satisfied. Statistical significance was set to .05. The data quality was assessed by missing value filter and three-sigma rule, and the last observation carried forward method was applied for vital signs assessed as not meeting data quality. The last observation carried forward method is similar to the use of vital signs for diagnosis in general clinical practice and has been mainly used as the imputation method of missing values in clinical prediction models. When there was no measured value, the missing value in the data applied zero imputation to show that it was never measured in the prediction model. Zero imputation was conducted if the calculation could not be performed for reasons such as divided by zero after applying the statistical feature processing.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Experimental settings of the vital signs, statistical methods, and processed window time. h: hours.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="340"/>
            <col width="660"/>
            <thead>
              <tr valign="top">
                <td>Category</td>
                <td>Experimental options</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Value of vital signs</td>
                <td>Heart rate, respiratory rate, oxygen saturation, systolic blood pressure, mean blood pressure, diastolic blood pressure, body temperature</td>
              </tr>
              <tr valign="top">
                <td>Statistical method of feature processing</td>
                <td>Mean, median, minimum, maximum, standard deviation, skewness, kurtosis, slope, entropy, delta, absolute delta, correlation coefficient, cross-correlation</td>
              </tr>
              <tr valign="top">
                <td>Processed observation window size</td>
                <td>3 h, 6 h, 12 h, 24 h</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Feature Selection Algorithms</title>
        <p>To increase the model’s performance and to exclude statistical feature values with low feature importance, a method that has been used and verified mainly in the existing machine learning was used. The feature selection method and algorithm were selected because of the large sparsity of data used in this study and because the coefficient was not larger than that of the typical data (<xref ref-type="table" rid="table1">Table 1</xref>). In addition, the feature selection algorithm presented in this study was applied (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Proposed feature selection algorithm.</p>
          </caption>
          <graphic xlink:href="medinform_v8i7e15965_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>In this study, M is the prediction model, x is the feature derived from each vital sign, and F is the performance of the model and the sum of the receiver operating characteristic (ROC) and average precision. In the case of the data in this study, it was difficult to measure the performance of the minor class when the incidence ratio was too low. Therefore, the classification performance of the major and minor classes for the model selection was evaluated at the same time as the sum of the average precision and area under the ROC (AUROC) curve. When the features were derived from the vital signs, it was limited to the use of only data obtained from the past observation based on the prediction time to prevent any lookahead due to future observations. In addition, to measure the performance of the proposed feature selection algorithm, we compared the methods usually used from each approach of the feature selection techniques. In the filter approach, chi-squared and mutual information gain were selected. In the embedded approach, lasso linear model L1–based feature selection, extra tree, random forest, and gradient boosting tree–based feature selection were selected. The other principal component analyses were excluded. Principal component analysis is mainly used in a high dimensional space; thus, an additional analysis of the generated features is needed. This means that the direct interpretation power is relatively low in terms of the correlation between the predicted results and the feature importance. In addition, principal component analysis has several disadvantages such as the feature transformation is possible only when all the existing features are contained and high computational cost. Thus, principal component analysis was excluded owing to the above problems. To minimize the differences between the models’ coincidence and temporal characteristics, the observation window and feature processing time stamps were used equally and the model was built without data sampling.</p>
      </sec>
      <sec>
        <title>Machine Learning Algorithms</title>
        <p>For the classification algorithm of LONS prediction, logistic regression, Gaussian Naïve Bayes, decision tree, gradient boosting, adaptive boosting, bagging classifier, random forest, and multilayer perceptron were selected and assessed. These machine learning classifiers were mainly used in supervised learning methods such as linear model, naive Bayes, decision tree, ensemble method, and neural network model. In the case of the deep learning model, the performance variation was large depending on the number of layers and the change in the learning rate, and the amount of data was not enough to train the deep learning model. Thus, the deep learning models were excluded. To evaluate the performance between the feature selection model and the proposed algorithm, 10% of the target population was used as the feature selection data set, 80% as the train set, and 10% as the test set to perform a stratified 10-fold cross-validation. Then, 100 turns of bootstrapping were applied to obtain the confidence interval for the 95% section of the performance indicator. The model performance indicator enabled a detailed evaluation of the imbalanced data performance by using indicators such as accuracy, AUROC curve, area under the precision-recall curve (APRC), positive predictive value, negative predictive value, and the harmonic mean of precision and recall (F1 score).</p>
      </sec>
      <sec>
        <title>Data Sampling Algorithm</title>
        <p>If the data sampling algorithm is applied to model learning after labeling of the data set, a normal model learning is barely attainable because of the imbalanced and overwhelming data. In this study, undersampling algorithms, oversampling algorithms, and a combination of both oversampling and undersampling algorithms, which are data sampling algorithms, were applied to the training set, and the extent to which the model performance for EMR data set was affected was checked using a test set that was not sampled. The oversampling algorithms used were the synthetic minority oversampling technique (SMOTE) [<xref ref-type="bibr" rid="ref26">26</xref>], adaptive synthetic sampling method [<xref ref-type="bibr" rid="ref27">27</xref>], and RandomOverSampler. The undersampling algorithms used were NearMiss [<xref ref-type="bibr" rid="ref28">28</xref>], RandomUnderSampler, All-K-Nearest-Neighbors [<xref ref-type="bibr" rid="ref29">29</xref>], and InstanceHardnessThreshold [<xref ref-type="bibr" rid="ref30">30</xref>]. As for the combination of oversampling and undersampling algorithms, SMOTE + Wilson’s Edited Nearest Neighbor (SMOTEENN) rule [<xref ref-type="bibr" rid="ref31">31</xref>] and SMOTE + Tomek links [<xref ref-type="bibr" rid="ref32">32</xref>] were applied.</p>
      </sec>
      <sec>
        <title>Evaluation of the Algorithm</title>
        <p>The methods presented in <xref rid="figure2" ref-type="fig">Figure 2</xref> were introduced for the evaluation of the feature selection algorithms and prediction model. To prevent leaking of the test set, the MIMIC-III data were divided by organizing the feature selection evaluation data set at 20% and the prediction model evaluation data set at 80% by using a stratified shuffle. To avoid the overestimation in the test set due to the optimized estimator of 10-fold cross-validation, the performance of the prediction models was measured by initializing the hyperparameters at each fold. For the feature selection algorithm, performance was classified based on the Gaussian Naïve Bayesian Classifier as shown in the study by Phyu and Oo [<xref ref-type="bibr" rid="ref33">33</xref>]. Given that the classifier’s evaluation algorithm is straightforward and that the ensemble classifier such as the gradient-boosted machine can have interactions, nonlinear relationships, and automatically feature selection between features and because there is ambiguity in the statistical properties, the classifier was not selected as the base model [<xref ref-type="bibr" rid="ref34">34</xref>]. In addition, the mean, minimum, maximum, standard deviation, and median of each vital sign were designated as the baseline features and compared with models that did not perform a feature selection. The existing research model was compared to the model development algorithm presented in this study by presenting both the performance of the presented model and the performance that would have resulted if conducted using the MIMIC-III data. We used Statsmodels and NumPy libraries to analyze the statistical properties. The metric module, a Python module from scikit-learn library, was used to evaluate the classifiers.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Diagram of the evaluation process for models and algorithms. MIMIC-III: Medical Information Mart for Intensive Care; NICU: neonatal intensive care unit.</p>
          </caption>
          <graphic xlink:href="medinform_v8i7e15965_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Characteristics of the Study Population</title>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows the population characteristics of the infants in this study. Of the 7870 infants in the MIMIC-III database, 21 infants were assigned to the clinical LONS group and 2798 infants met the inclusion criteria for the control group. Gestational age, birth weight, and length of stay were significantly different between the clinical LONS and control groups. The median (IQR) gestational age and birth weight in the clinical LONS group were 30 (27.0-34.5) weeks and 0.80 (0.71-1.07) kg, respectively, which were slightly lower than those of the control group whose median (IQR) gestational age and birth weight were 34 (33.5-34.5) weeks and 2.02 (1.58-2.53) kg, respectively. The clinical LONS group showed a significantly longer intensive care unit stay than the control group (87.9 days and 13.3 days, respectively). The male sex rate (%) showed that the male infants in both the clinical LONS and proven sepsis groups had a high risk for infection (61.9% and 51.5%, respectively).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Characteristics of the target population (N=7870).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="220"/>
            <col width="190"/>
            <col width="190"/>
            <col width="190"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Demographic characteristics</td>
                <td>NICU<sup>a</sup>, n=96</td>
                <td>Clinical LONS<sup>b</sup> group, n=21</td>
                <td>Proven sepsis group, n=715</td>
                <td>NICU control group, n=2798</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Gestational age (week), median (25th-75th percentile)</td>
                <td>34.5 (33.5-35.5)</td>
                <td>30 (27.0-34.5)</td>
                <td>30 (26.6-34.5)</td>
                <td>34 (33.5-34.5)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Birth weight (kg), median (25th-75th percentile)</td>
                <td>2.56 (0.36-3.27)</td>
                <td>0.80 (0.71-1.07)</td>
                <td>0.98 (0.72-1.28)</td>
                <td>2.02 (1.58-2.53)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Length of stay (day), median (25th-75th percentile)</td>
                <td>0.9 (0.1-10.0)</td>
                <td>87.9 (61.9-110.9)</td>
                <td>71.2 (42.2-107.2)</td>
                <td>13.3 (7.1-28.5)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Mortality in the hospital, n (%)</td>
                <td>64 (0.8)</td>
                <td>3 (3.1)</td>
                <td>1 (5.0)</td>
                <td>14 (0.5)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Gender, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male, 4243 (53.9)</td>
                <td>54 (56.3)</td>
                <td>13 (61.9)</td>
                <td>368 (51.5)</td>
                <td>1508 (53.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female, 3627 (46.1)</td>
                <td>42 (43.7)</td>
                <td>8 (38.1)</td>
                <td>347 (48.5)</td>
                <td>1290 (46.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Race, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>White, 4764 (60.5)</td>
                <td>56 (58.3)</td>
                <td>13 (61.9)</td>
                <td>463 (64.8)</td>
                <td>1747 (62.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>African American, 865 (11.0)</td>
                <td>14 (14.6)</td>
                <td>3 (14.3)</td>
                <td>77 (10.8)</td>
                <td>301 (10.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Asian, 715 (9.1)</td>
                <td>2 (2.1)</td>
                <td>0 (0.0)</td>
                <td>36 (5.0)</td>
                <td>161 (5.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic, 369 (4.7)</td>
                <td>3 (3.1)</td>
                <td>1 (4.8)</td>
                <td>29 (4.1)</td>
                <td>136 (4.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other, 1157 (14.7)</td>
                <td>21 (21.9)</td>
                <td>4 (19.0)</td>
                <td>110 (15.4)</td>
                <td>453 (16.2)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Hospital admission type, n (%)<sup>c</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Newborn, 7859 (99.9)</td>
                <td>95 (99.0)</td>
                <td>21 (100.0)</td>
                <td>713 (99.7)</td>
                <td>2787 (96.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Emergency, 220 (2.8)</td>
                <td>22 (22.9)</td>
                <td>7 (33.3)</td>
                <td>9 (1.3)</td>
                <td>87 (3.0)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Urgent, 23 (0.3)</td>
                <td>0 (0.0)</td>
                <td>0 (0.0)</td>
                <td>1 (0.1)</td>
                <td>16 (0.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Elective, 4 (0.1)</td>
                <td>0 (0.0)</td>
                <td>0 (0.0)</td>
                <td>0 (0.0)</td>
                <td>2 (0.1)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>NICU: neonatal intensive care unit.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>LONS: late-onset neonatal sepsis.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>allowed to duplicated admission types.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Performance of the Feature Selection Algorithm</title>
        <p>The performances of the proposed feature selection algorithm and the existing feature selection algorithm were compared after 100 turns of bootstrapping; the measured performance by the algorithm is shown in <xref ref-type="table" rid="table3">Table 3</xref>. Given that the AUROC and accuracy rate are likely to be overestimated in the imbalanced data such as this study’s data, performance was evaluated based on the APRC and F1 measure, which can evaluate the classification performance for major and minor classes. If the window size is 6 hours, the accuracy of the chi-squared feature selection was the highest at 0.60. The extra tree–based feature selection showed a higher performance with AUROC of 0.79, APRC of 0.23, and F1 score of 0.21. When the goal window size was set at 12 hours, the chi-squared (accuracy 0.68, positive predictive value 0.18), extra tree (APRC 0.24), and the proposed algorithm (AUROC 0.79, F1 score 0.25, and weighted-F1 0.65) showed a higher performance than the baseline. However, the feature selection of the manual information gain and lasso L1 penalty classification was still lower than the performance of the baseline model. In a 24-hour window, the proposed algorithm displayed an overall high performance with AUROC of 0.81 (0.81-0.82), APRC of 0.24 (0.23-0.25), and F1 score of 0.33 (0.32-0.34). When the compatibility interval was evaluated, a uniform performance was displayed despite the variations caused by the sample. Overall, as the duration of the observation window increased, the model receiving the features consisting of statistical values as input had improved performance compared to the baseline feature model. The lasso L1 penalty classification model, which is a univariate method, shows the highest indicator with an accuracy of 0.90. However, an AUROC of 0.69 and F1 score of 0.05 indicate that a feature that can barely distinguish normal from suspected infection conditions was selected. The wrapper method feature selection, which was expected to show a high performance, showed a lower performance than the baseline feature model when the observation window was 6 hours. When the observation time was increased to 12 or 24 hours, the extra tree feature selection showed a high performance. However, as the confidence interval appears wider, the robustness based on the sample population changes is lower than those of the other feature selection algorithms. In particular, the feature selection of the feature importance in the random forest and gradient boosting classifier showed an AUROC of 0.56-0.62 and 0.69-0.75, respectively, at 12 hours, and with the 24-hour window, it showed a wide range of confidence intervals at 0.72-0.79 and 0.75-0.81, respectively.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Comparison results for various feature selection algorithms<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="100"/>
            <col width="0"/>
            <col width="130"/>
            <col width="0"/>
            <col width="130"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="130"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Window size and algorithm</td>
                <td colspan="2">Accuracy<sup>b</sup>, odds ratio (95% CI)</td>
                <td colspan="2">AUROC<sup>c</sup>, odds ratio (95% CI)</td>
                <td colspan="2">APRC<sup>d</sup>, odds ratio (95% CI)</td>
                <td colspan="2">F1<sup>e</sup>, odds ratio (95% CI)</td>
                <td colspan="2">Weighted-F1<sup>f</sup>, odds ratio (95% CI)</td>
                <td colspan="2">PPV<sup>g</sup>, odds ratio (95% CI)</td>
                <td colspan="2">NPV<sup>h</sup>, odds ratio (95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="16">
                  <bold>24 hours</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Proposed</td>
                <td colspan="2">0.76 (0.75-0.78)</td>
                <td colspan="2">
                  <italic>0.81 (0.80-0.81)</italic>
                </td>
                <td colspan="2">
                  <italic>0.31(0.31-0.32)</italic>
                </td>
                <td colspan="2">
                  <italic>0.39 (0.38-0.40)</italic>
                </td>
                <td colspan="2">0.80 (0.79-0.81)</td>
                <td colspan="2">0.28 (0.27-0.29)</td>
                <td>0.95 (0.95-0.96)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CS<sup>i</sup></td>
                <td colspan="2">
                  <italic>0.83 (0.81-0.84)</italic>
                </td>
                <td colspan="2">0.77 (0.76-0.77)</td>
                <td colspan="2">0.28 (0.27-0.29)</td>
                <td colspan="2">0.34 (0.34-0.35)</td>
                <td colspan="2">
                  <italic>0.83 (0.82-0.85)</italic>
                </td>
                <td colspan="2">
                  <italic>0.30 (0.29-0.31)</italic>
                </td>
                <td>0.92 (0.92-0.93)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">MIG<sup>j</sup></td>
                <td colspan="2">0.15 (0.13-0.17)</td>
                <td colspan="2">0.53 (0.51-0.54)</td>
                <td colspan="2">0.12 (0.12-0.13)</td>
                <td colspan="2">0.20 (0.20-0.21)</td>
                <td colspan="2">0.08 (0.06-0.11)</td>
                <td colspan="2">0.11 (0.11-0.12)</td>
                <td>
                  <italic>0.99 (0.98-0.99)</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">LL1<sup>k</sup></td>
                <td colspan="2">0.27 (0.23-0.31)</td>
                <td colspan="2">0.54 (0.52-0.55)</td>
                <td colspan="2">0.12 (0.12-0.13)</td>
                <td colspan="2">0.22 (0.21-0.22)</td>
                <td colspan="2">0.26 (0.21-0.30)</td>
                <td colspan="2">0.13 (0.12-0.13)</td>
                <td>0.93 (0.93-0.94)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">ET<sup>l</sup></td>
                <td colspan="2">0.64 (0.60-0.68)</td>
                <td colspan="2">0.79 (0.77-0.81)</td>
                <td colspan="2">0.31 (0.29-0.32)</td>
                <td colspan="2">0.36 (0.35-0.37)</td>
                <td colspan="2">0.68 (0.64-0.73)</td>
                <td colspan="2">0.24 (0.23-0.26)</td>
                <td>0.97 (0.96-0.97)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">RF<sup>m</sup></td>
                <td colspan="2">0.31 (0.27-0.36)</td>
                <td colspan="2">0.65 (0.61-0.68)</td>
                <td colspan="2">0.20 (0.18-0.23)</td>
                <td colspan="2">0.25 (0.24-0.26)</td>
                <td colspan="2">0.30 (0.25-0.36)</td>
                <td colspan="2">0.15 (0.14-0.16)</td>
                <td>0.98 (0.98-0.98)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">GB<sup>n</sup></td>
                <td colspan="2">0.49 (0.44-0.54)</td>
                <td colspan="2">0.72 (0.70-0.75)</td>
                <td colspan="2">0.25 (0.23-0.27)</td>
                <td colspan="2">0.30 (0.29-0.32)</td>
                <td colspan="2">0.51 (0.45-0.57)</td>
                <td colspan="2">0.19 (0.18-0.21)</td>
                <td>0.97 (0.97-0.98)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Baseline</td>
                <td colspan="2">0.56 (0.53-0.58)</td>
                <td colspan="2">0.77 (0.77-0.77)</td>
                <td colspan="2">0.27 (0.26-0.28)</td>
                <td colspan="2">0.30 (0.29-0.31)</td>
                <td colspan="2">0.62 (0.59-0.65)</td>
                <td colspan="2">0.19 (0.18-0.19)</td>
                <td>0.96 (0.96-0.97)</td>
              </tr>
              <tr valign="top">
                <td colspan="16">
                  <bold>12 hours</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Proposed</td>
                <td colspan="2">0.65 (0.62-0.68)</td>
                <td colspan="2">0.75 (0.75-0.76)</td>
                <td colspan="2">0.25 (0.24-0.25)</td>
                <td colspan="2">
                  <italic>0.31 (0.30-0.32)</italic>
                </td>
                <td colspan="2">0.70 (0.67-0.73)</td>
                <td colspan="2">0.21 (0.20-0.22)</td>
                <td>0.95 (0.95-0.95)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CS</td>
                <td colspan="2">
                  <italic>0.77 (0.75-0.78)</italic>
                </td>
                <td colspan="2">0.72 (0.71-0.72)</td>
                <td colspan="2">0.22 (0.22-0.23)</td>
                <td colspan="2">0.30 (0.29-0.30)</td>
                <td colspan="2">
                  <italic>0.79 (0.78-0.81)</italic>
                </td>
                <td colspan="2">
                  <italic>0.23 (0.22-0.23)</italic>
                </td>
                <td>0.93 (0.92-0.93)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">MIG</td>
                <td colspan="2">0.17 (0.15-0.19)</td>
                <td colspan="2">0.58 (0.56-0.60)</td>
                <td colspan="2">0.15 (0.14-0.16)</td>
                <td colspan="2">0.21 (0.20-0.21)</td>
                <td colspan="2">0.13 (0.10-0.16)</td>
                <td colspan="2">0.11 (0.11-0.11)</td>
                <td>0.98 (0.97-0.98)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">LL1</td>
                <td colspan="2">0.11 (0.11-0.11)</td>
                <td colspan="2">0.50 (0.50-0.50)</td>
                <td colspan="2">0.11 (0.11-0.11)</td>
                <td colspan="2">0.20 (0.19-0.20)</td>
                <td colspan="2">0.02 (0.02-0.02)</td>
                <td colspan="2">0.11 (0.11-0.11)</td>
                <td>
                  <italic>1.00 (1.00-1.00)</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">ET</td>
                <td colspan="2">0.48 (0.44-0.51)</td>
                <td colspan="2">
                  <italic>0.78 (0.77-0.80)</italic>
                </td>
                <td colspan="2">
                  <italic>0.29 (0.28-0.30)</italic>
                </td>
                <td colspan="2">0.29 (0.27-0.30)</td>
                <td colspan="2">0.53 (0.49-0.57)</td>
                <td colspan="2">0.17 (0.16-0.19)</td>
                <td>0.97 (0.97-0.98)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">RF</td>
                <td colspan="2">0.25 (0.21-0.29)</td>
                <td colspan="2">0.61 (0.58-0.64)</td>
                <td colspan="2">0.18 (0.16-0.19)</td>
                <td colspan="2">0.23 (0.22-0.24)</td>
                <td colspan="2">0.22 (0.17-0.27)</td>
                <td colspan="2">0.13 (0.12-0.14)</td>
                <td>0.99 (0.99-0.99)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">GB</td>
                <td colspan="2">0.41 (0.37-0.45)</td>
                <td colspan="2">0.74 (0.72-0.76)</td>
                <td colspan="2">0.24 (0.23-0.26)</td>
                <td colspan="2">0.27 (0.25-0.27)</td>
                <td colspan="2">0.45 (0.40-0.50)</td>
                <td colspan="2">0.16 (0.15-0.17)</td>
                <td>0.97 (0.97-0.98)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Baseline</td>
                <td colspan="2">0.62 (0.59-0.66)</td>
                <td colspan="2">0.73 (0.73-0.74)</td>
                <td colspan="2">0.23 (0.23-0.24)</td>
                <td colspan="2">0.30 (0.30-0.31)</td>
                <td colspan="2">0.67 (0.63-0.71)</td>
                <td colspan="2">0.20 (0.19-0.21)</td>
                <td>0.95 (0.95-0.95)</td>
              </tr>
              <tr valign="top">
                <td colspan="16">
                  <bold>6 hours</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Proposed</td>
                <td colspan="2">0.44 (0.40-0.47)</td>
                <td colspan="2">0.70 (0.70-0.71)</td>
                <td colspan="2">0.20 (0.20-0.21)</td>
                <td colspan="2">0.25 (0.24-0.25)</td>
                <td colspan="2">0.49 (0.45-0.52)</td>
                <td colspan="2">0.15 (0.14-0.16)</td>
                <td>0.96 (0.95-0.96)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CS</td>
                <td colspan="2">0.56 (0.52-0.61)</td>
                <td colspan="2">0.67 (0.65-0.68)</td>
                <td colspan="2">0.18 (0.17-0.19)</td>
                <td colspan="2">0.25 (0.24-0.26)</td>
                <td colspan="2">0.60 (0.56-0.65)</td>
                <td colspan="2">0.16 (0.16-0.17)</td>
                <td>0.93 (0.93-0.94)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">MIG</td>
                <td colspan="2">0.11 (0.11-0.11)</td>
                <td colspan="2">0.50 (0.50-0.50)</td>
                <td colspan="2">0.11 (0.11-0.11)</td>
                <td colspan="2">0.19 (0.19-0.20)</td>
                <td colspan="2">0.03 (0.02-0.03)</td>
                <td colspan="2">0.11 (0.11-0.11)</td>
                <td>0.99 (0.98-1.00)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">LL1</td>
                <td colspan="2">0.11 (0.11-0.11)</td>
                <td colspan="2">0.50 (0.50-0.50)</td>
                <td colspan="2">0.11 (0.11-0.11)</td>
                <td colspan="2">0.19 (0.19-0.20)</td>
                <td colspan="2">0.02 (0.02-0.02)</td>
                <td colspan="2">0.11 (0.11-0.11)</td>
                <td>
                  <italic>1.00 (1.00-1.00)</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">ET</td>
                <td colspan="2">0.46 (0.41-0.50)</td>
                <td colspan="2">0.71 (0.69-0.74)</td>
                <td colspan="2">
                  <italic>0.23 (0.22-0.24)</italic>
                </td>
                <td colspan="2">0.28 (0.26-0.29)</td>
                <td colspan="2">0.49 (0.43-0.54)</td>
                <td colspan="2">0.17 (0.16-0.18)</td>
                <td>0.97 (0.96-0.97)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">RF</td>
                <td colspan="2">0.30 (0.25-0.34)</td>
                <td colspan="2">0.61 (0.59-0.64)</td>
                <td colspan="2">0.17 (0.15-0.18)</td>
                <td colspan="2">0.17 (0.15-0.18)</td>
                <td colspan="2">0.22 (0.21-0.23)</td>
                <td colspan="2">
                  <italic>0.28 (0.22-0.34)</italic>
                </td>
                <td>0.97 (0.96-0.98)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">GB</td>
                <td colspan="2">0.37 (0.32-0.42)</td>
                <td colspan="2">0.66 (0.63-0.69)</td>
                <td colspan="2">0.19 (0.18-0.21)</td>
                <td colspan="2">0.25 (0.24-0.26)</td>
                <td colspan="2">0.38 (0.32-0.44)</td>
                <td colspan="2">0.15 (0.14-0.16)</td>
                <td>0.96 (0.95-0.97)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Baseline</td>
                <td colspan="2">
                  <italic>0.60 (0.56-0.63)</italic>
                </td>
                <td colspan="2">
                  <italic>0.72 (0.71-0.72)</italic>
                </td>
                <td colspan="2">0.21 (0.21-0.22)</td>
                <td colspan="2">
                  <italic>0.29 (0.21-0.22)</italic>
                </td>
                <td colspan="2">
                  <italic>0.65 (0.61-0.69)</italic>
                </td>
                <td colspan="2">0.18 (0.18-0.19)</td>
                <td>0.95 (0.95-0.95)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>The highest score in each column is shown in italics.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>Accuracy: (true positive + true negative) / (positive + negative).</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>AUROC: area under the receiver operating characteristic.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>APRC: area under the precision recall curve.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>F1: harmonic mean of precision and recall.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>Weighted-F1: macro F1 measurement.</p>
            </fn>
            <fn id="table3fn7">
              <p><sup>g</sup>PPV: positive predictive value.</p>
            </fn>
            <fn id="table3fn8">
              <p><sup>h</sup>NPV: negative predictive value.</p>
            </fn>
            <fn id="table3fn9">
              <p><sup>i</sup>CS: chi-square test.</p>
            </fn>
            <fn id="table3fn10">
              <p><sup>j</sup>MIG: mutual information gain.</p>
            </fn>
            <fn id="table3fn11">
              <p><sup>k</sup>LL1: lasso L1 penalty classification.</p>
            </fn>
            <fn id="table3fn12">
              <p><sup>l</sup>ET: extra tree.</p>
            </fn>
            <fn id="table3fn13">
              <p><sup>m</sup>RF: random forest.</p>
            </fn>
            <fn id="table3fn14">
              <p><sup>n</sup>GB: gradient boosting.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Performance of Data Sampling</title>
        <p>Data sampling was measured by fixing the observation time to 24 hours, applying sampling only on training data using the Gaussian Naïve Bayesian classifier and performing stratified 10-fold cross validation. The results of the accuracy analysis showed that the adaptive synthetic sampling method, All-K-Nearest-Neighbors, InstanceHardnessThreshold, and SMOTEENN performed better than the average value of 0.7, which exceeds the 0.579 of the original data. AUROC and APRC showed that all sampling methods, except SMOTEENN, showed a lower or similar performance to the original ones. In the F1 score, SMOTEENN and instance hardness threshold had a higher performance than the original ones.</p>
      </sec>
      <sec>
        <title>Characteristics of the Selected Features</title>
        <p>The features obtained from the proposed feature selection method are shown in <xref ref-type="table" rid="table4">Table 4</xref>. Clinicians might be provided with clinical information on selected features through plots in the form of <xref ref-type="table" rid="table5">Table 5</xref> and <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. <xref ref-type="table" rid="table5">Table 5</xref> represents the feature importance of the onset after 24 hours calculated by the prediction model learned based on the values of the selected features. <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> provides information on how the prediction model made decisions. Three features were selected among the features mainly selected for each vital sign, and the difference of the latent feature selected based on the window size was confirmed. For the 24-hour window size, the delta between the current and previous measurements was the main variable for all the vital signs. Of these, the kurtosis of the respiratory rate, kurtosis of the body temperature, standard oxygen saturation, and the delta of blood pressures were extracted similarly to the significant feature of the septic shock prediction model [<xref ref-type="bibr" rid="ref35">35</xref>] for adult patients in the MIMIC-III database, as presented by Carrara et al. As the window size decreased, the data characteristics of the features shifted in importance to mean, entropy, and entropy of delta. This is probably because, in newborns with suspected infection, the frequency of the records increased within the same period such that it affected the entropy increase and was selected as the main variable. When the <italic>P</italic> value of the feature was analyzed using multivariate logistic regression and by focusing on the infection and noninfection points of the statistically significant variables, the oxygen saturation showed desaturation symptoms and wide oxygen saturation changes at the infection point. For the heart rate, tachycardia symptoms were observed at the point of infection. For the body temperature, a delta kurtosis showed a lower expected infection point. Unstable temperature, bradycardia, tachycardia, and hypotension, which are the clinical signs of LONS, were measured [<xref ref-type="bibr" rid="ref25">25</xref>]. The statistical variable was found to have a lower or similar performance compared to the baseline model for the 12-hour window size. This shows that at least 12 hours of accumulated vital signs must be statistically analyzed so that they can be used as significant physiomarkers.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Selected features from the proposed feature selection algorithm.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="330"/>
            <col width="640"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Vital signs and prediction window size</td>
                <td>Statistical method of feature processing</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Heart rate</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>24 hours</td>
                <td>Mean, median absolute delta, minimum absolute delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>12 hours</td>
                <td>Mean, minimum absolute delta, median absolute delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>6 hours</td>
                <td>Mean, entropy delta, entropy</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Respiratory rate</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>24 hours</td>
                <td>Mean, median absolute delta, kurtosis absolute delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>12 hours</td>
                <td>Mean, entropy delta, minimum absolute delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>6 hours</td>
                <td>Mean, entropy absolute delta, entropy delta</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Oxygen saturation</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>24 hours</td>
                <td>Mean, standard deviation delta, maximum absolute delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>12 hours</td>
                <td>Mean, maximum absolute delta, standard deviation delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>6 hours</td>
                <td>Mean, entropy delta, entropy absolute delta</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Diastolic blood pressure</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>24 hours</td>
                <td>Mean, maximum absolute delta, maximum delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>12 hours</td>
                <td>Mean, kurtosis delta, kurtosis absolute delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>6 hours</td>
                <td>Mean, entropy delta, entropy absolute delta</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Mean blood pressure</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>24 hours</td>
                <td>Mean, maximum absolute delta, maximum delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>12 hours</td>
                <td>Mean, maximum absolute delta, kurtosis delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>6 hours</td>
                <td>Mean, entropy delta, entropy absolute delta</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Systolic blood pressure</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>24 hours</td>
                <td>Mean, maximum absolute delta, maximum delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>12 hours</td>
                <td>Mean, kurtosis delta, kurtosis absolute delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>6 hours</td>
                <td>Mean, entropy absolute delta, entropy delta</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Body temperature</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>24 hours</td>
                <td>Mean, kurtosis delta, mean absolute delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>12 hours</td>
                <td>Mean, entropy delta, entropy absolute delta</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>6 hours</td>
                <td>Mean, entropy delta, entropy absolute delta</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>An example of the prediction feature importance obtained from the prediction model based on the feature selection algorithm.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="280"/>
            <col width="360"/>
            <col width="360"/>
            <thead>
              <tr valign="top">
                <td>Vital signs</td>
                <td>Statistical method of feature processing</td>
                <td>Feature importance values</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Body temperature</td>
                <td>Mean</td>
                <td>0.282</td>
              </tr>
              <tr valign="top">
                <td>Oxygen saturation</td>
                <td>Mean</td>
                <td>0.133</td>
              </tr>
              <tr valign="top">
                <td>Oxygen saturation</td>
                <td>Standard deviation delta</td>
                <td>0.126</td>
              </tr>
              <tr valign="top">
                <td>Heart rate</td>
                <td>Mean</td>
                <td>0.106</td>
              </tr>
              <tr valign="top">
                <td>Body temperature</td>
                <td>Mean absolute delta</td>
                <td>0.052</td>
              </tr>
              <tr valign="top">
                <td>Heart rate</td>
                <td>Median absolute delta</td>
                <td>0.046</td>
              </tr>
              <tr valign="top">
                <td>Respiratory rate</td>
                <td>Mean</td>
                <td>0.042</td>
              </tr>
              <tr valign="top">
                <td>Mean blood pressure</td>
                <td>Mean</td>
                <td>0.032</td>
              </tr>
              <tr valign="top">
                <td>Body temperature</td>
                <td>Kurtosis delta</td>
                <td>0.022</td>
              </tr>
              <tr valign="top">
                <td>Mean blood pressure</td>
                <td>Maximum absolute delta</td>
                <td>0.022</td>
              </tr>
              <tr valign="top">
                <td>Diastolic blood pressure</td>
                <td>Maximum absolute delta</td>
                <td>0.019</td>
              </tr>
              <tr valign="top">
                <td>Mean blood pressure</td>
                <td>Maximum delta</td>
                <td>0.018</td>
              </tr>
              <tr valign="top">
                <td>Respiratory rate</td>
                <td>Kurtosis absolute delta</td>
                <td>0.017</td>
              </tr>
              <tr valign="top">
                <td>Systolic blood pressure</td>
                <td>Maximum absolute delta</td>
                <td>0.016</td>
              </tr>
              <tr valign="top">
                <td>Diastolic blood pressure</td>
                <td>Mean</td>
                <td>0.013</td>
              </tr>
              <tr valign="top">
                <td>Systolic blood pressure</td>
                <td>Mean</td>
                <td>0.013</td>
              </tr>
              <tr valign="top">
                <td>Respiratory rate</td>
                <td>Median absolute delta</td>
                <td>0.011</td>
              </tr>
              <tr valign="top">
                <td>Oxygen saturation</td>
                <td>Maximum absolute delta</td>
                <td>0.010</td>
              </tr>
              <tr valign="top">
                <td>Diastolic blood pressure</td>
                <td>Maximum delta</td>
                <td>0.009</td>
              </tr>
              <tr valign="top">
                <td>Systolic blood pressure</td>
                <td>Maximum delta</td>
                <td>0.006</td>
              </tr>
              <tr valign="top">
                <td>Heart rate</td>
                <td>Minimum absolute delta</td>
                <td>0.004</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Performance of the Prediction Model</title>
        <p>The models presented in this study and those developed in a previous study are shown in <xref ref-type="table" rid="table6">Table 6</xref>. The following 2 model types were developed based on the onset point: a prediction model that predicts LONS occurrence 48 hours earlier and a detection model that discovers LONS at the time of measurement. The overall performance of the presented model was higher than that of the model presented in previous studies [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Compared with the NICU sepsis prediction model of MIMIC-III, which has the same data source, the model developed in this study showed a high performance despite the relatively large number of patients. When comparing the model performance, the gradient boosting of the boost type linking multiple week estimators showed an AUROC of 0.881, APRC of 0.536, and F1 score of 0.625 for the prediction model, while the detection model showed a high performance at an AUROC of 0.877, APRC of 0.567, and F1 score of 0.653. The logistic regression and multilayer perceptron with L2 penalty showed an AUROC of 0.874 and 0.860, APRC of 0.558 and 0.496, and F1 scores of 0.593 and 0.542, respectively, for the prediction model, whereas the detection model showed AUROC of 0.874 and 0.860, APRC of 0.558 and 0.534, and F1 scores of 0.615 and 0.595, respectively, which showed an overall higher performance than the existing LONS prediction models.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Performance results of the prediction models (microaverage).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="240"/>
            <col width="110"/>
            <col width="110"/>
            <col width="90"/>
            <col width="80"/>
            <col width="70"/>
            <col width="130"/>
            <col width="70"/>
            <col width="70"/>
            <thead>
              <tr valign="bottom">
                <td colspan="2">Model (Validation data source)</td>
                <td>Forecast (h)</td>
                <td>Accuracy <sup>a</sup></td>
                <td>AUROC<sup>b</sup></td>
                <td>APRC<sup>c</sup></td>
                <td>F1<sup>d</sup></td>
                <td>Weighted-F1<sup>e</sup></td>
                <td>PPV<sup>f</sup></td>
                <td>NPV<sup>g</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="10">
                  <bold>Proposed optimization algorithm LONS<sup>h</sup> prediction model (MIMIC-III)<sup>i</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Logistic regression</td>
                <td>48</td>
                <td>0.812</td>
                <td>0.861</td>
                <td>0.446</td>
                <td>0.522</td>
                <td>0.835</td>
                <td>0.395</td>
                <td>0.958</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gaussian Naïve Bayes</td>
                <td>48</td>
                <td>0.694</td>
                <td>0.821</td>
                <td>0.394</td>
                <td>0.424</td>
                <td>0.743</td>
                <td>0.283</td>
                <td>0.964</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Decision tree classifier</td>
                <td>48</td>
                <td>0.811</td>
                <td>0.841</td>
                <td>0.449</td>
                <td>0.504</td>
                <td>0.833</td>
                <td>0.389</td>
                <td>0.950</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Extra tree classifier</td>
                <td>48</td>
                <td>0.867</td>
                <td>0.803</td>
                <td>0.367</td>
                <td>0.131</td>
                <td>0.822</td>
                <td>0.527</td>
                <td>0.874</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Bagging classifier</td>
                <td>48</td>
                <td>0.863</td>
                <td>0.771</td>
                <td>0.335</td>
                <td>0.251</td>
                <td>0.835</td>
                <td>0.469</td>
                <td>0.883</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Random forest classifier</td>
                <td>48</td>
                <td>0.867</td>
                <td>0.805</td>
                <td>0.371</td>
                <td>0.205</td>
                <td>0.831</td>
                <td>0.514</td>
                <td>0.879</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AdaBoost<sup>j</sup> classifier</td>
                <td>48</td>
                <td>0.825</td>
                <td>0.831</td>
                <td>0.421</td>
                <td>0.507</td>
                <td>0.842</td>
                <td>0.407</td>
                <td>0.944</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gradient boosting classifier</td>
                <td>48</td>
                <td>0.845</td>
                <td>0.859</td>
                <td>0.462</td>
                <td>0.522</td>
                <td>0.856</td>
                <td>0.445</td>
                <td>0.939</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Multilayer perceptron classifier</td>
                <td>48</td>
                <td>0.811</td>
                <td>0.841</td>
                <td>0.449</td>
                <td>0.504</td>
                <td>0.833</td>
                <td>0.389</td>
                <td>0.950</td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Proposed optimization algorithm detection model (MIMIC-III)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Logistic regression</td>
                <td>0-48</td>
                <td>0.798</td>
                <td>0.862</td>
                <td>0.568</td>
                <td>0.619</td>
                <td>0.814</td>
                <td>0.501</td>
                <td>0.943</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gaussian Naïve Bayes</td>
                <td>0-48</td>
                <td>0.690</td>
                <td>0.806</td>
                <td>0.492</td>
                <td>0.523</td>
                <td>0.720</td>
                <td>0.380</td>
                <td>0.942</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Decision tree classifier</td>
                <td>0-48</td>
                <td>0.812</td>
                <td>0.614</td>
                <td>0.306</td>
                <td>0.376</td>
                <td>0.786</td>
                <td>0.572</td>
                <td>0.839</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Extra tree classifier</td>
                <td>0-48</td>
                <td>0.809</td>
                <td>0.794</td>
                <td>0.491</td>
                <td>0.180</td>
                <td>0.748</td>
                <td>0.683</td>
                <td>0.813</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Bagging classifier</td>
                <td>0-48</td>
                <td>0.812</td>
                <td>0.774</td>
                <td>0.461</td>
                <td>0.327</td>
                <td>0.777</td>
                <td>0.592</td>
                <td>0.831</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Random forest classifier</td>
                <td>0-48</td>
                <td>0.817</td>
                <td>0.825</td>
                <td>0.513</td>
                <td>0.302</td>
                <td>0.775</td>
                <td>0.656</td>
                <td>0.827</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AdaBoost classifier</td>
                <td>0-48</td>
                <td>0.813</td>
                <td>0.835</td>
                <td>0.513</td>
                <td>0.598</td>
                <td>0.822</td>
                <td>0.529</td>
                <td>0.914</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gradient boosting classifier</td>
                <td>0-48</td>
                <td>0.830</td>
                <td>0.868</td>
                <td>0.592</td>
                <td>0.624</td>
                <td>0.836</td>
                <td>0.563</td>
                <td>0.919</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Multilayer perceptron classifier</td>
                <td>0-48</td>
                <td>0.799</td>
                <td>0.849</td>
                <td>0.558</td>
                <td>0.611</td>
                <td>0.813</td>
                <td>0.502</td>
                <td>0.935</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>Accuracy: (true positive + true negative) / (positive + negative).</p>
            </fn>
            <fn id="table6fn2">
              <p><sup>b</sup>AUROC: area under the receiver operating characteristic.</p>
            </fn>
            <fn id="table6fn3">
              <p><sup>c</sup>APRC: area under the precision recall curve.</p>
            </fn>
            <fn id="table6fn4">
              <p><sup>d</sup>F1: harmonic mean of precision and recall.</p>
            </fn>
            <fn id="table6fn5">
              <p><sup>e</sup>Weighted-F1: macro-F1 measurement.</p>
            </fn>
            <fn id="table6fn6">
              <p><sup>f</sup>PPV: positive predictive value.</p>
            </fn>
            <fn id="table6fn7">
              <p><sup>g</sup>NPV: negative predictive value.</p>
            </fn>
            <fn id="table6fn8">
              <p><sup>h</sup>LONS: late-onset neonatal sepsis.</p>
            </fn>
            <fn id="table6fn9">
              <p><sup>i</sup>MIMIC-III: Medical Information Mart for Intensive Care III.</p>
            </fn>
            <fn id="table6fn10">
              <p><sup>j</sup>AdaBoost: adaptive boosting.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>This study showed that when the biosignals recorded in EMR are used to select and learn features based on the presented algorithm, it is possible to produce a model that can predict LONS 48 hours earlier. Our model also showed a higher or similar performance to the high-resolution model of previous studies. The vital sign–based prediction model, which was based on EMR, showed a model performance that exceeded the model that learned based on the laboratory test, which was presented by Mani et al [<xref ref-type="bibr" rid="ref14">14</xref>]. When compared with the same classifier, the ROC of the prediction model with our random forest algorithm was 0.805, whereas that of the random forest using the laboratory tests of Mani et al [<xref ref-type="bibr" rid="ref14">14</xref>] was 0.650, with the vital sign–based learning model showing higher performance. Stanculescu et al’s [<xref ref-type="bibr" rid="ref12">12</xref>] autoregressive hidden Markov model showed an F1 score of 0.690 and APRC of 0.63, which showed higher performance compared to the vital sign–based prediction model that was based on EMR in this study. However, when compared to the detection model, our vital sign–based prediction model that was based on EMR showed a high overall performance. Even if the ROC of the heart rate characteristics was 0.72-0.77, the vital sign–based prediction model recorded in EMR has a higher predictive accuracy than the electrocardiogram-based presentation model [<xref ref-type="bibr" rid="ref10">10</xref>]. The presented model is expected to show a high contribution even in environments where high-resolution biometric data cannot be collected or where blood culture and laboratory tests cannot be performed regularly. The feature selection presented in this study showed a robust performance compared to the wrapper and embedded method feature selections, which are mainly used in the existing machine learning. Through the selected feature, the main physiomarker can be extracted conversely from EMR. In particular, for preterm infants whose definitions for the normal range of vital signs are insufficient, statistical variables such as biosignal delta and kurtosis over 24 hours can be used as a basis for classifying a patient’s condition. Blood pressure was not used as a key indicator because of the different patient criteria, but it can be used as a major feature by using statistical processing. Moreover, the contribution of the respiratory rate, which was expected to be a key indicator, was low. This is probably because there was a slight change in the respiratory rate of the infants owing to the intervention and ventilation procedures. The correlation coefficient and cross-correlation, which were expected to be important, showed low predictability in low-resolution EMR data. However, they are expected to yield significant results with a high-resolution data set. The vital sign–based prediction model developed in this study has low interpretability, similar to the deep learning and machine learning prediction models in previous studies [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. However, the feature selection presented in this study shows a high performance in linear classifiers such as logistic regression and shows no significant change in performance in other classifiers. If we take advantage of this, applying the feature selection to models such as the fully connected conditional random field and Bayesian inference that have high interpretability can solve the abovementioned problem. Given that the selected feature has dozens of feature spaces, compared to the hundreds of feature spaces in the previous models, simply looking at the model’s input variable will have sufficiently high interpretability.</p>
      <p>This study has the following limitations. First, external validation is required because the training and test data sets were created within the MIMIC-III database. N-fold cross validation was performed to reduce data bias as much as possible, but the results may vary depending on the clinician’s recording cycle, pattern, and policy. Therefore, further research requires progress on whether the model generated by the algorithm is equally applicable to the other EMR databases. Second, the limitation about the data extraction was that the prediction model was generated only with noninvasive signs. This was because the number of noninvasive measurements was relatively higher than that of invasive measurements and thus was extracted from most patients. However, an invasive measurement method has the advantage of providing an accurate measurement value; thus, it is performed for patients requiring intensive observation. In future, it is necessary to study whether there is an improvement in performance when the invasive measurement method is applied to the prediction model of this study. Third, infants without infection may have been included in this study or the timing of sepsis onset might not have been recorded correctly. In clinical practice, empirical antibiotic treatment may be administered to noninfected infants with symptoms of sepsis to reduce mortality. Therefore, there is a limitation that false-positive sepsis can occur. In addition, since the MIMIC-III database covers the period from 2001 to 2008, the data may differ by patient population, treatment, and sepsis definition. Fourth, in the vital sign–based prediction model developed in this study, only multilayer perceptron was applied as a deep learning model. In addition, the performance presented in this study is likely to be lower than the maximum performance that can be modeled because the vital sign–based prediction model that was based on EMR developed in this study is a default model with no hyperparameter tuning. Therefore, advanced deep learning models should be applied to develop sophisticated and accurate prediction models in future studies. Lastly, our model could not be compared with the risk score model and the medical guidelines used in clinical practice. In clinical practice, the results of the hematology tests such as complete blood cell count, immature neutrophil to total neutrophil ratio, and polymorphonuclear leukocyte counts are mainly applied. In the MIMIC-III database used in this study, there was not enough data to record the results of the hematology test as a score model, which makes it difficult to directly compare the performance with the prediction model of the study. Further, ethnicity, gender, and immaturity might affect the outcomes since each factor affects the incidence of sepsis. Previous studies have shown that low birth weight and male gender as risk factors of infection could affect the probability of bloodstream infection. Ethnicity did not seem to directly affect the incidence of sepsis, but the sepsis incidence is different according to the community income level. Therefore, if the aforementioned characteristics of the infants are different from the population of this study, then there is a possibility of obtaining different results. Moreover, the MIMIC-III database lacks the number of infant samples that can be configured for each condition, and it is difficult to show the difference in the results. Nevertheless, acceptable results will be obtained again if the proposed algorithm is reperformed for a specific population. In addition, although the gene type was not recorded in the MIMIC-III database and could not be included, research on gene types should be conducted in the future. If the vital sign–based prediction model that was based on EMR developed in this study is applied to clinical sites, patients with a high LONS risk can be identified up to 48 hours in advance with high accuracy based on the nonregular charts. This could be the basis for triage of patients with a high LONS risk. Combining the predicted results of this algorithm with vital signs traditionally used in clinical sites and test results will help clinicians reach an augmented decision.</p>
      <p>In conclusion, we developed a prediction model after generating a key feature with feature selection presented in the EMR data. By doing so, a vital sign–based prediction model that was based on EMR achieved a high prediction performance and robustness compared to the previous feature selection. This research model is expected to significantly reduce the mortality of patients with LONS, and sophisticated predictions can be made through the deep learning model and model optimization. However, the limitations of data extraction and the need to construct a data collection environment remain as the major challenges in applying predictive models in clinical practice. Thus, further research is needed to address these problems.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>An example of the decision tree graph for the decision tree classifier.The colors indicate the major class in each node.</p>
        <media xlink:href="medinform_v8i7e15965_app1.png" xlink:title="PNG File , 2685 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">APRC</term>
          <def>
            <p>area under the precision-recall curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUROC</term>
          <def>
            <p>area under the receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LONS</term>
          <def>
            <p>late-onset neonatal sepsis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">MIMIC-III</term>
          <def>
            <p>Medical Information Mart for Intensive Care III</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">NICU</term>
          <def>
            <p>neonatal intensive care unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SMOTE</term>
          <def>
            <p>synthetic minority oversampling technique</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">SMOTEENN</term>
          <def>
            <p>SMOTE + Wilson’s Edited Nearest Neighbor Rule</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by a grant of the Korea Health Technology R&#38;D Project through the Korea Health Industry Development Institute (KHIDI), which is funded by the Ministry of Health &#38; Welfare, Republic of Korea (grant number: HI18C0022).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hornik</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Fort</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Watt</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Benjamin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Manzoni</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Jacqz-Aigrain</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kaguelidou</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen-Wolkowiez</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Early and late onset sepsis in very-low-birth-weight infants from a large group of neonatal intensive care units</article-title>
          <source>Early Human Development</source>
          <year>2012</year>
          <month>5</month>
          <volume>88</volume>
          <fpage>S69</fpage>
          <lpage>S74</lpage>
          <pub-id pub-id-type="doi">10.1016/s0378-3782(12)70019-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stoll</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Korones</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Shankaran</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tyson</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Bauer</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Fanaroff</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Lemons</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Donovan</surname>
              <given-names>EF</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Stevenson</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Ehrenkranz</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Papile</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Verter</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>LL</given-names>
            </name>
          </person-group>
          <article-title>Late-onset sepsis in very low birth weight neonates: a report from the National Institute of Child Health and Human Development Neonatal Research Network</article-title>
          <source>J Pediatr</source>
          <year>1996</year>
          <month>07</month>
          <volume>129</volume>
          <issue>1</issue>
          <fpage>63</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1016/s0022-3476(96)70191-9</pub-id>
          <pub-id pub-id-type="medline">8757564</pub-id>
          <pub-id pub-id-type="pii">S0022-3476(96)70191-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fanaroff</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Korones</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Verter</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Poland</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Bauer</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Tyson</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Philips</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lucey</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Catz</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Shankaran</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Incidence, presenting features, risk factors and significance of late onset septicemia in very low birth weight infants. The National Institute of Child Health and Human Development Neonatal Research Network</article-title>
          <source>Pediatr Infect Dis J</source>
          <year>1998</year>
          <month>07</month>
          <volume>17</volume>
          <issue>7</issue>
          <fpage>593</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1097/00006454-199807000-00004</pub-id>
          <pub-id pub-id-type="medline">9686724</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bekhof</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Reitsma</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Kok</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Van Straaten</surname>
              <given-names>IHLM</given-names>
            </name>
          </person-group>
          <article-title>Clinical signs to identify late-onset sepsis in preterm infants</article-title>
          <source>Eur J Pediatr</source>
          <year>2013</year>
          <month>04</month>
          <volume>172</volume>
          <issue>4</issue>
          <fpage>501</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1007/s00431-012-1910-6</pub-id>
          <pub-id pub-id-type="medline">23271492</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Borghesi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stronati</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Strategies for the prevention of hospital-acquired infections in the neonatal intensive care unit</article-title>
          <source>J Hosp Infect</source>
          <year>2008</year>
          <month>04</month>
          <volume>68</volume>
          <issue>4</issue>
          <fpage>293</fpage>
          <lpage>300</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jhin.2008.01.011</pub-id>
          <pub-id pub-id-type="medline">18329134</pub-id>
          <pub-id pub-id-type="pii">S0195-6701(08)00031-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sivanandan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Soraisham</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Swarnam</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Choice and duration of antimicrobial therapy for neonatal sepsis and meningitis</article-title>
          <source>Int J Pediatr</source>
          <year>2011</year>
          <volume>2011</volume>
          <fpage>712150</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2011/712150"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2011/712150</pub-id>
          <pub-id pub-id-type="medline">22164179</pub-id>
          <pub-id pub-id-type="pmcid">PMC3228399</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zech</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Badgeley</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Costa</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Titano</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Oermann</surname>
              <given-names>EK</given-names>
            </name>
          </person-group>
          <article-title>Variable generalization performance of a deep learning model to detect pneumonia in chest radiographs: A cross-sectional study</article-title>
          <source>PLoS Med</source>
          <year>2018</year>
          <month>11</month>
          <volume>15</volume>
          <issue>11</issue>
          <fpage>e1002683</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pmed.1002683"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1002683</pub-id>
          <pub-id pub-id-type="medline">30399157</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-18-01277</pub-id>
          <pub-id pub-id-type="pmcid">PMC6219764</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Faes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kale</surname>
              <given-names>AU</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bruynseels</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mahendiran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Moraes</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shamdas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kern</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ledsam</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Schmid</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Balaskas</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Topol</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bachmann</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Keane</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Denniston</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>A comparison of deep learning performance against health-care professionals in detecting diseases from medical imaging: a systematic review and meta-analysis</article-title>
          <source>The Lancet Digital Health</source>
          <year>2019</year>
          <month>10</month>
          <volume>1</volume>
          <issue>6</issue>
          <fpage>e271</fpage>
          <lpage>e297</lpage>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(19)30123-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Malak</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Zeraati</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nayeri</surname>
              <given-names>FS</given-names>
            </name>
            <name name-style="western">
              <surname>Safdari</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Shahraki</surname>
              <given-names>AD</given-names>
            </name>
          </person-group>
          <article-title>Neonatal intensive care decision support systems using artificial intelligence techniques: a systematic review</article-title>
          <source>Artif Intell Rev</source>
          <year>2018</year>
          <month>5</month>
          <day>22</day>
          <volume>52</volume>
          <issue>4</issue>
          <fpage>2685</fpage>
          <lpage>2704</lpage>
          <pub-id pub-id-type="doi">10.1007/s10462-018-9635-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>O'Shea</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Bissonette</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Harrell</surname>
              <given-names>FE</given-names>
            </name>
            <name name-style="western">
              <surname>Lake</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Moorman</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Abnormal Heart Rate Characteristics Preceding Neonatal Sepsis and Sepsis-Like Illness</article-title>
          <source>Pediatr Res</source>
          <year>2003</year>
          <month>6</month>
          <volume>53</volume>
          <issue>6</issue>
          <fpage>920</fpage>
          <lpage>926</lpage>
          <pub-id pub-id-type="doi">10.1203/01.pdr.0000064904.05313.d2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Moorman</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Toward the early diagnosis of neonatal sepsis and sepsis-like illness using novel heart rate analysis</article-title>
          <source>Pediatrics</source>
          <year>2001</year>
          <month>01</month>
          <volume>107</volume>
          <issue>1</issue>
          <fpage>97</fpage>
          <lpage>104</lpage>
          <pub-id pub-id-type="doi">10.1542/peds.107.1.97</pub-id>
          <pub-id pub-id-type="medline">11134441</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stanculescu</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>CKI</given-names>
            </name>
            <name name-style="western">
              <surname>Freer</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Autoregressive hidden Markov models for the early detection of neonatal sepsis</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2014</year>
          <month>09</month>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>1560</fpage>
          <lpage>70</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2013.2294692</pub-id>
          <pub-id pub-id-type="medline">25192568</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stanculescu</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Freer</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A Hierarchical Switching Linear Dynamical System Applied to the Detection of Sepsis in Neonatal Condition Monitoring</article-title>
          <year>2014</year>
          <month>6</month>
          <day>24</day>
          <conf-name>Proceedings of the Thirtieth Conference on Uncertainty in Artificial Intelligence</conf-name>
          <conf-date>23 July 2014</conf-date>
          <conf-loc>Quebec City, Quebec, Canada</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ozdas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Aliferis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Varol</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Carnevale</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Romano-Keeler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nian</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Weitkamp</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Medical decision support using machine learning for early detection of late-onset neonatal sepsis</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2014</year>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>326</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24043317"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001854</pub-id>
          <pub-id pub-id-type="medline">24043317</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2013-001854</pub-id>
          <pub-id pub-id-type="pmcid">PMC3932458</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sullivan</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Grice</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Lake</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Moorman</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Fairchild</surname>
              <given-names>KD</given-names>
            </name>
          </person-group>
          <article-title>Infection and other clinical correlates of abnormal heart rate characteristics in preterm infants</article-title>
          <source>J Pediatr</source>
          <year>2014</year>
          <month>04</month>
          <volume>164</volume>
          <issue>4</issue>
          <fpage>775</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24412138"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jpeds.2013.11.038</pub-id>
          <pub-id pub-id-type="medline">24412138</pub-id>
          <pub-id pub-id-type="pii">S0022-3476(13)01473-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC3962693</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brunak</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Mining electronic health records: towards better research applications and clinical care</article-title>
          <source>Nat Rev Genet</source>
          <year>2012</year>
          <month>05</month>
          <day>02</day>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>395</fpage>
          <lpage>405</lpage>
          <pub-id pub-id-type="doi">10.1038/nrg3208</pub-id>
          <pub-id pub-id-type="medline">22549152</pub-id>
          <pub-id pub-id-type="pii">nrg3208</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>WF</given-names>
            </name>
          </person-group>
          <article-title>Prediction Modeling Using EHR Data</article-title>
          <source>Medical Care</source>
          <year>2010</year>
          <volume>48</volume>
          <fpage>S106</fpage>
          <lpage>S113</lpage>
          <pub-id pub-id-type="doi">10.1097/mlr.0b013e3181de9e17</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>Riccardo</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Fei</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Shuang</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Xiaoqian</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>Joel T</given-names>
            </name>
          </person-group>
          <article-title>Deep learning for healthcare: review, opportunities and challenges</article-title>
          <source>Brief Bioinform</source>
          <year>2018</year>
          <month>11</month>
          <day>27</day>
          <volume>19</volume>
          <issue>6</issue>
          <fpage>1236</fpage>
          <lpage>1246</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28481991"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bib/bbx044</pub-id>
          <pub-id pub-id-type="medline">28481991</pub-id>
          <pub-id pub-id-type="pii">3800524</pub-id>
          <pub-id pub-id-type="pmcid">PMC6455466</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>DN</given-names>
            </name>
          </person-group>
          <article-title>Addressing the Class Imbalance Problem in Medical Datasets</article-title>
          <source>IJMLC</source>
          <year>2013</year>
          <fpage>224</fpage>
          <lpage>228</lpage>
          <pub-id pub-id-type="doi">10.7763/ijmlc.2013.v3.307</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Segura-Bedmar</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Colón-Ruíz</surname>
              <given-names>Cristobal</given-names>
            </name>
            <name name-style="western">
              <surname>Tejedor-Alonso</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moro-Moro</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Predicting of anaphylaxis in big data EMR by exploring machine learning approaches</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>11</month>
          <volume>87</volume>
          <fpage>50</fpage>
          <lpage>59</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30187-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.09.012</pub-id>
          <pub-id pub-id-type="medline">30266231</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30187-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mazurowski</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Habas</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Zurada</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Baker</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Tourassi</surname>
              <given-names>GD</given-names>
            </name>
          </person-group>
          <article-title>Training neural network classifiers for medical decision making: the effects of imbalanced datasets on classification performance</article-title>
          <source>Neural Netw</source>
          <year>2008</year>
          <volume>21</volume>
          <issue>2-3</issue>
          <fpage>427</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18272329"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.neunet.2007.12.031</pub-id>
          <pub-id pub-id-type="medline">18272329</pub-id>
          <pub-id pub-id-type="pii">S0893-6080(07)00240-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC2346433</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ngiam</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ooi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yip</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Resolving the Bias in Electronic Medical Records</article-title>
          <year>2017</year>
          <conf-name>Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>August 2017</conf-date>
          <conf-loc>Halifax, NS, Canada</conf-loc>
          <publisher-loc>New York, NY, USA</publisher-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <fpage>2171</fpage>
          <lpage>2180</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/3097983.3098149"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3097983.3098149</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AEW</given-names>
            </name>
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>MIMIC-III, a freely accessible critical care database</article-title>
          <source>Sci Data</source>
          <year>2016</year>
          <month>05</month>
          <day>24</day>
          <volume>3</volume>
          <fpage>160035</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27219127"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id>
          <pub-id pub-id-type="medline">27219127</pub-id>
          <pub-id pub-id-type="pii">sdata201635</pub-id>
          <pub-id pub-id-type="pmcid">PMC4878278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guerti</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Devos</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ieven</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Mahieu</surname>
              <given-names>LM</given-names>
            </name>
          </person-group>
          <article-title>Time to positivity of neonatal blood cultures: fast and furious?</article-title>
          <source>J Med Microbiol</source>
          <year>2011</year>
          <month>04</month>
          <volume>60</volume>
          <issue>Pt 4</issue>
          <fpage>446</fpage>
          <lpage>453</lpage>
          <pub-id pub-id-type="doi">10.1099/jmm.0.020651-0</pub-id>
          <pub-id pub-id-type="medline">21163823</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zea-Vera</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ochoa</surname>
              <given-names>TJ</given-names>
            </name>
          </person-group>
          <article-title>Challenges in the diagnosis and management of neonatal sepsis</article-title>
          <source>J Trop Pediatr</source>
          <year>2015</year>
          <month>02</month>
          <volume>61</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25604489"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/tropej/fmu079</pub-id>
          <pub-id pub-id-type="medline">25604489</pub-id>
          <pub-id pub-id-type="pii">fmu079</pub-id>
          <pub-id pub-id-type="pmcid">PMC4375388</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chawla</surname>
              <given-names>NV</given-names>
            </name>
            <name name-style="western">
              <surname>Bowyer</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>LO</given-names>
            </name>
            <name name-style="western">
              <surname>Kegelmeyer</surname>
              <given-names>WP</given-names>
            </name>
          </person-group>
          <article-title>SMOTE: Synthetic Minority Over-sampling Technique</article-title>
          <source>jair</source>
          <year>2002</year>
          <month>06</month>
          <day>01</day>
          <volume>16</volume>
          <fpage>321</fpage>
          <lpage>357</lpage>
          <pub-id pub-id-type="doi">10.1613/jair.953</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>ADASYN: Adaptive synthetic sampling approach for imbalanced learning</article-title>
          <year>2008</year>
          <month>06</month>
          <conf-name>2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence)</conf-name>
          <conf-date>1-8 June 2008</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <publisher-name>IEEE</publisher-name>
          <fpage>A</fpage>
          <pub-id pub-id-type="doi">10.1109/IJCNN.2008.4633969</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mani</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>KNN Approach to Unbalanced Data Distributions: A Case Study Involving Information Extraction</article-title>
          <year>2003</year>
          <month>8</month>
          <day>21</day>
          <conf-name>Proceeding of International Conference on Machine Learning (ICML 2003), Workshop on Learning from Imbalanced Data Sets</conf-name>
          <conf-date>21 August 2003</conf-date>
          <conf-loc>Washington DC</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tomek</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>An Experiment with the Edited Nearest-Neighbor Rule</article-title>
          <source>IEEE Trans. Syst., Man, Cybern</source>
          <year>1976</year>
          <month>6</month>
          <volume>SMC-6</volume>
          <issue>6</issue>
          <fpage>448</fpage>
          <lpage>452</lpage>
          <pub-id pub-id-type="doi">10.1109/tsmc.1976.4309523</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>Mr</given-names>
            </name>
            <name name-style="western">
              <surname>Martinez</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Giraud-Carrier</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>An instance level analysis of data complexity</article-title>
          <source>Mach Learn</source>
          <year>2013</year>
          <month>11</month>
          <day>5</day>
          <volume>95</volume>
          <issue>2</issue>
          <fpage>225</fpage>
          <lpage>256</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30311153"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10994-013-5422-z</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10620-018-5316-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC6436636</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Batista</surname>
              <given-names>GEAPA</given-names>
            </name>
            <name name-style="western">
              <surname>Prati</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Monard</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>A study of the behavior of several methods for balancing machine learning training data</article-title>
          <source>SIGKDD Explor. Newsl</source>
          <year>2004</year>
          <month>06</month>
          <day>01</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>20</fpage>
          <lpage>29</lpage>
          <pub-id pub-id-type="doi">10.1145/1007730.1007735</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Batista</surname>
              <given-names>GEAPA</given-names>
            </name>
            <name name-style="western">
              <surname>Bazzan</surname>
              <given-names>ALC</given-names>
            </name>
            <name name-style="western">
              <surname>Monard</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>Balancing Training Data for Automated Annotation of Keywords: a Case Study</article-title>
          <source>Brazilian Workshop on Bioinformatics</source>
          <year>2003</year>
          <conf-name>Proceedings of the Second Brazilian Workshop on Bioinformatics</conf-name>
          <conf-date>December 3-5, 2003</conf-date>
          <conf-loc>Macaé, Rio de Janeiro, Brazil</conf-loc>
          <fpage>10</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pdfs.semanticscholar.org/c1a9/5197e15fa99f55cd0cb2ee14d2f02699a919.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Phyu</surname>
              <given-names>TZ</given-names>
            </name>
            <name name-style="western">
              <surname>Oo</surname>
              <given-names>NN</given-names>
            </name>
          </person-group>
          <article-title>Performance Comparison of Feature Selection Methods</article-title>
          <source>MATEC Web of Conferences</source>
          <year>2016</year>
          <month>02</month>
          <day>17</day>
          <volume>42</volume>
          <fpage>06002</fpage>
          <pub-id pub-id-type="doi">10.1051/matecconf/20164206002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>Stochastic gradient boosting</article-title>
          <source>Computational Statistics &#38; Data Analysis</source>
          <year>2002</year>
          <month>2</month>
          <volume>38</volume>
          <issue>4</issue>
          <fpage>367</fpage>
          <lpage>378</lpage>
          <pub-id pub-id-type="doi">10.1016/s0167-9473(01)00065-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carrara</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Baselli</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrario</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Mortality Prediction Model of Septic Shock Patients Based on Routinely Recorded Data</article-title>
          <source>Comput Math Methods Med</source>
          <year>2015</year>
          <volume>2015</volume>
          <fpage>761435</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2015/761435"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2015/761435</pub-id>
          <pub-id pub-id-type="medline">26557154</pub-id>
          <pub-id pub-id-type="pmcid">PMC4628694</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
