<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i3e30587</article-id>
      <article-id pub-id-type="pmid">35289753</article-id>
      <article-id pub-id-type="doi">10.2196/30587</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Selective Prediction With Long Short-term Memory Using Unit-Wise Batch Standardization for Time Series Health Data Sets: Algorithm Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chen</surname>
            <given-names>Lichin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Oladele</surname>
            <given-names>Daniel</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Nam</surname>
            <given-names>Borum</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8267-9499</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>Joo Young</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5221-1123</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>In Young</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9580-7074</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Cho</surname>
            <given-names>Baek Hwan</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <address>
            <institution>Medical AI Research Center</institution>
            <institution>Samsung Medical Center</institution>
            <addr-line>81, Irwon-ro, Gangnam-gu</addr-line>
            <addr-line>Seoul, 06351</addr-line>
            <country>Republic of Korea</country>
            <phone>82 234100885</phone>
            <email>baekhwan.cho@samsung.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7722-5660</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Electronic Engineering</institution>
        <institution>Hanyang University</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Biomedical Engineering</institution>
        <institution>Hanyang University</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Medical AI Research Center</institution>
        <institution>Samsung Medical Center</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Baek Hwan Cho <email>baekhwan.cho@samsung.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>3</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>3</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>3</issue>
      <elocation-id>e30587</elocation-id>
      <history>
        <date date-type="received">
          <day>21</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>27</day>
          <month>9</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>16</day>
          <month>11</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>2</day>
          <month>1</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Borum Nam, Joo Young Kim, In Young Kim, Baek Hwan Cho. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 15.03.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/3/e30587" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>In any health care system, both the classification of data and the confidence level of such classifications are important. Therefore, a selective prediction model is required to classify time series health data according to confidence levels of prediction.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to develop a method using long short-term memory (LSTM) models with a reject option for time series health data classification.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>An existing selective prediction method was adopted to implement an option for rejecting a classification output in LSTM models. However, a conventional selection function approach to LSTM does not achieve acceptable performance during learning stages. To tackle this problem, we proposed a unit-wise batch standardization that attempts to normalize each hidden unit in LSTM to apply the structural characteristics of LSTM models that concern the selection function.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The ability of our method to approximate the target confidence level was compared by coverage violations for 2 time series of health data sets consisting of human activity and arrhythmia. For both data sets, our approach yielded lower average coverage violations (0.98% and 1.79% for each data set) than those of the conventional approach. In addition, the classification performance when using the reject option was compared with that of other normalization methods. Our method demonstrated superior performance for selective risk (12.63% and 17.82% for each data set), false-positive rates (2.09% and 5.8% for each data set), and false-negative rates (10.58% and 17.24% for each data set).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our normalization approach can help make selective predictions for time series health data. We expect this technique to enhance the confidence of users in classification systems and improve collaborative efforts between humans and artificial intelligence in the medical field through the use of classification that considers confidence.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>artificial intelligence</kwd>
        <kwd>recurrent neural networks</kwd>
        <kwd>biomedical informatics</kwd>
        <kwd>computer-aided analysis</kwd>
        <kwd>mobile phone</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>High-performance networks have been used to enhance the quality and convenience of human life since the development of deep learning techniques. Deep learning networks are used in education, aviation, process management, entertainment, agriculture, and robotics. Artificial intelligence (AI) has made significant contributions to a variety of medical applications [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. However, in a clinical setting, the output from AI as an accurate prediction is often insufficient and requires its interpretation for further decisions [<xref ref-type="bibr" rid="ref4">4</xref>]. As medical AI systems can support efficient and accurate decisions, it is important not only to increase the accuracy of classification in deep learning networks but also to reduce errors, particularly those that can be fatal [<xref ref-type="bibr" rid="ref5">5</xref>]. In addition, health care data tend to be complex, and neural networks have proven problematic in accurately recognizing patterns in this complexity [<xref ref-type="bibr" rid="ref6">6</xref>]. The uncertainty of prediction measures the reliability of a prediction and must be considered in fields that require prudent decisions, such as medicine or autonomous driving [<xref ref-type="bibr" rid="ref7">7</xref>]. Accordingly, in fields where minor errors can cause significant problems, applying a prediction model that can reject predictions when the confidence level is not high enough is helpful. To develop such a deep neural network, a selective prediction [<xref ref-type="bibr" rid="ref8">8</xref>] method can be applied to use the confidence level in both training and test sessions.</p>
        <p>Various biosignal sensors have been developed for human health care applications, and many algorithms have been developed to analyze the data produced by these sensors. Deep learning technologies have performed well when applied to data obtained from health care or medical sensors [<xref ref-type="bibr" rid="ref9">9</xref>]. Classification models based on a deep neural network or convolutional neural network (CNN) have been used to classify health and medical data. In addition, biosignals and time series data from humans are used in diverse health care systems [<xref ref-type="bibr" rid="ref10">10</xref>]. In various studies, recurrent neural network (RNN) models have been used to classify health and medical data, especially time series data. Among such models, RNNs have contributed significantly to the classification of time series data. Many studies have used RNN models to classify electronic health records obtained from clinical measurements [<xref ref-type="bibr" rid="ref11">11</xref>], predict diseases using patient diagnostic histories [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref14">14</xref>], conduct health status analyses using biosignals [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref18">18</xref>], and classify health information from mobile and wearable sensors [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. Previous studies have applied prediction confidence to classify image data, and prediction confidence can be considered for classifying time series health data using RNN models. However, little research has focused on how to use prediction confidence for time series health data.</p>
        <p>Considering the specificity of time series health data, a model that can produce results according to the predicted confidence level and uses prediction confidence has the advantage of reducing fatal errors.</p>
        <p>The selective prediction model can learn from certain samples that are sufficiently confident in their predictions. This means that such a model can ignore predictions when they are uncertain in training. In addition, the selective prediction model provides a confidence level for each test sample in the inference stage, which can be used as a reference score in a medical situation. In early studies on selective prediction, neural network models with a reject option were used to obtain a specific confidence score from a trained model and as a model threshold to validate performance [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. However, these methods calculate the prediction probability to select samples for training based on a threshold called the prediction confidence score.</p>
        <p>Recently, research using the selective prediction model mainly consists of 2 parts. The first is to extract an appropriate prediction confidence score and the second is to make good use of the extracted prediction confidence score for the deep learning model. For extracting the prediction confidence score, methods have been designed in many studies. For example, the softmax response and Monte Carlo (MC) dropout methods use a confidence score from neural networks [<xref ref-type="bibr" rid="ref26">26</xref>]. The softmax response method extracts a confidence score using maximum softmax values from neural networks, as described in the above methods, whereas an MC dropout estimates a confidence score using statistical approaches. However, MC dropout requires a high computational cost to optimize the problem quickly. Although Bayesian methods [<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>] can produce prediction confidence scores of RNNs [<xref ref-type="bibr" rid="ref30">30</xref>], they are applicable only for natural language processing, which uses <italic>many-to-many</italic> RNNs with multiple sequence inputs and outputs. However, the predictive models in health care are usually <italic>many-to-one</italic> types that predict class using a health information time series as input, and it is helpful for medical staff to train a <italic>many-to-one</italic> predictive model for time series data that has a selective prediction ability. For a model using the prediction confidence score, a selective prediction model that learned both prediction and selection was developed [<xref ref-type="bibr" rid="ref31">31</xref>]. On the basis of this method, SelectiveNet [<xref ref-type="bibr" rid="ref32">32</xref>] has demonstrated potential possibilities for various applications, with the advantage of learning the selection and prediction simultaneously. However, the structure of the selective prediction model using long short-term memory (LSTM) has not been validated in previous studies. Thus, a well-designed selective prediction model for time series data is required.</p>
      </sec>
      <sec>
        <title>Objective</title>
        <p>In this study, a selective prediction model using LSTM [<xref ref-type="bibr" rid="ref33">33</xref>] was implemented to classify time series health data. In particular, we considered a method that incorporates a reject option to control and measure prediction confidence for <italic>many-to-one</italic> classification tasks. As the selection function uses the output of the prediction model as an input, a suitable selection function structure must be devised. Therefore, methods to normalize the selection function were compared to achieve a structure suitable for classifying time series data with LSTM. To validate the LSTM selective prediction performance, we used coverage violations and selective risks for each data set. As high false-positive and false-negative rates can be critical factors in diagnoses, we also present the false-positive and false-negative rates of the LSTM selective prediction model. In summary, the goal of this study is to develop a selective prediction model for health data time series. The contributions of this study are (1) applying the latest selective prediction method with superior performance to classify time series health data using LSTM and (2) presenting the structure of the selection function in the selective prediction model (especially the normalization method) for time series selective prediction.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Selective Prediction</title>
        <p>We examined the possibility of RNN models with a reject option using SelectiveNet [<xref ref-type="bibr" rid="ref32">32</xref>], which has superior performance compared with existing selective prediction models. The overall structure of the model was based on the SelectiveNet [<xref ref-type="bibr" rid="ref32">32</xref>] model with an LSTM; it is divided into selective and auxiliary predictions, as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. The selective prediction is divided again into two steps: prediction and selection. Prediction involves the results of the LSTM model and the selection part extracts the predicted confidence level of the LSTM model. In this study, we propose unit-wise batch standardization (UBS) as part of the selection function. Selective prediction is performed using both the prediction and selection function results. An auxiliary prediction step using the LSTM prediction result to derive the final result with the selective prediction result was added to enhance prediction performance. As selective prediction is a prediction model using a deep learning model structure, it is optimized by a loss function. The entire model is trained by optimizing the selective prediction and auxiliary prediction steps simultaneously. Further details are provided in the <italic>Optimization</italic> section. LSTM was used for the RNN model for time series data classification.</p>
        <p>A selective model was used to implement classification models with the reject option [<xref ref-type="bibr" rid="ref34">34</xref>]. The selective model (<italic>f, g</italic>) consists of pairing a prediction function <italic>f</italic> and a selection function <italic>g:X→Y {Y&#124;0≤Y≤1}</italic> (<italic>X</italic> is a set of inputs and <italic>Y</italic> is a set of outputs). When the data set is given as <inline-graphic xlink:href="medinform_v10i3e30587_fig4.png" xlink:type="simple" mimetype="image"/> for supervised learning of the classification model, the empirical risk of prediction function <italic>f</italic> becomes <inline-graphic xlink:href="medinform_v10i3e30587_fig5.png" xlink:type="simple" mimetype="image"/>. When <italic>τ</italic> is a threshold, <italic>g</italic> acts as a qualifier of <italic>f</italic> and can be expressed as follows:</p>
        <p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i3e30587_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>Selective models can be controlled by coverage and risk values. When <italic>E<sub>p</sub></italic> is the expected probability, and ℓ is the loss function, we can define the coverage and risk as follows:</p>
        <p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i3e30587_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>where <italic>g</italic>(<italic>x</italic>) is the prediction confidence score, <italic>ϕ</italic>(<italic>g</italic>) is a coverage value that is the expected value of the prediction confidence scores for training samples, which is correlated with the number of selected samples during training. R(<italic>f, g</italic>) is a selective risk that represents the error rate for predicting the selected samples using selective prediction. The corresponding selective risk for a data set <inline-graphic xlink:href="medinform_v10i3e30587_fig8.png" xlink:type="simple" mimetype="image"/>is called the empirical selective risk and is defined as follows:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i3e30587_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>The empirical coverage corresponding to the data set <italic>S<sub>m</sub></italic> is as follows:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i3e30587_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Long short-term memory model structure with a reject option. LSTM: long short-term memory.</p>
          </caption>
          <graphic xlink:href="medinform_v10i3e30587_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Optimization</title>
        <p>An optimization method was used to constrain coverage and reduce the selective risk [<xref ref-type="bibr" rid="ref31">31</xref>]. The selective prediction model was optimized by the loss functions in equations 6, 7, and 8. This loss function simultaneously regulates the prediction and selection steps. Hence, the selective prediction was regulated to lower the error rate, which is the selective risk for the selected samples according to the prediction confidence. In addition, the selection step was optimized to select training samples based on the predefined target coverage so that the selection step would reject predictions below the confidence level. The target coverage is a controlling hyperparameter for the model to learn the amount of data to be selected during training. On the basis of this, we trained the model so that the coverage value was as close to the target coverage as possible. The target coverage <italic>c</italic> is in the range 0&#60;<italic>c</italic>≤1. When the parameter set of the selective model (<italic>f, g</italic>) is Θ the optimization of the selective model is as follows:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i3e30587_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>The <italic>f<sub>θ</sub></italic> and <italic>g<sub>θ</sub></italic> in the selective prediction were optimized by equation 6. It is necessary to constrain coverage and reduce risk (error) for selective prediction. We used the interior point method for optimization [<xref ref-type="bibr" rid="ref35">35</xref>]. The following unconstrained objective is used to optimize the selective prediction model for a data set <italic>S<sub>m</sub></italic>:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i3e30587_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <italic>c</italic> is the target coverage, and λ is a hyperparameter that controls the coverage constraints. Using equation 6, the selection function <italic>g</italic> is optimized to produce an appropriate prediction confidence score, and the selective prediction is optimized to reduce the selective risk <inline-graphic xlink:href="medinform_v10i3e30587_fig16.png" xlink:type="simple" mimetype="image"/>. The empirical coverage value <inline-graphic xlink:href="medinform_v10i3e30587_fig13.png" xlink:type="simple" mimetype="image"/> is probabilistically calculated using the selection function. The Ψ allows the coverage value <inline-graphic xlink:href="medinform_v10i3e30587_fig13.png" xlink:type="simple" mimetype="image"/>to approximate the target coverage during the training session. The auxiliary classification loss is optimized using the loss function <inline-graphic xlink:href="medinform_v10i3e30587_fig17.png" xlink:type="simple" mimetype="image"/>. Overall, optimization can be defined using a convex combination expressed by the following equations:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i3e30587_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <disp-formula>
          <graphic xlink:href="medinform_v10i3e30587_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where α is another user-controlled parameter for the weights between the selective and auxiliary predictions.</p>
      </sec>
      <sec>
        <title>UBS Procedure</title>
        <p>In this study, a new selection function structure for LSTM models was designed. The basic frame of the selection function structure was based on a CNN-based model from a previous study [<xref ref-type="bibr" rid="ref32">32</xref>] that used batch normalization [<xref ref-type="bibr" rid="ref36">36</xref>] for the selection function. The detailed structure and parameters were determined through a grid search. The output shape of the <italic>many-to-one</italic> structure LSTM is (n_batch, n_hidden_unit), with conventional batch normalization, applying the same mean and variance to all units. However, this method of normalization ignores the features of each hidden unit in the LSTM output. To address this problem, we applied a new UBS that normalizes the batch derived from an original batch normalization [<xref ref-type="bibr" rid="ref36">36</xref>] while preserving the hidden-unit features captured for each training sample. As shown in <xref ref-type="table" rid="table1">Table 1</xref>, UBS uses a fully connected layer that maintains the LSTM output's shape while generating the output and standardizing the batch, as shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. When batch normalization is applied to CNNs, normalization factors (mean and variance) are obtained from each input channel [<xref ref-type="bibr" rid="ref37">37</xref>]. However, to preserve hidden units' individual features, we calculated normalization factors obtained from each LSTM's hidden unit.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Detailed structure of the selective prediction step.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="500"/>
            <col width="360"/>
            <thead>
              <tr valign="top">
                <td>Layer</td>
                <td>Input shape</td>
                <td>Output shape</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>LSTM<sup>a</sup></td>
                <td>(n_batch, n_time steps, n_features)</td>
                <td>(n_batch, n_hidden unit)</td>
              </tr>
              <tr valign="top">
                <td>FC1<sup>b,c</sup></td>
                <td>(n_batch, n_hidden unit)</td>
                <td>(n_batch, n_hidden unit)</td>
              </tr>
              <tr valign="top">
                <td>FC2<sup>b,d</sup></td>
                <td>(n_batch, n_hidden unit)</td>
                <td>(n_batch, n_hidden unit)</td>
              </tr>
              <tr valign="top">
                <td>ReLU<sup>b,e</sup></td>
                <td>(n_batch, n_hidden unit)</td>
                <td>(n_batch, n_hidden unit)</td>
              </tr>
              <tr valign="top">
                <td>UBS<sup>b,f</sup></td>
                <td>(n_batch, n_hidden unit)</td>
                <td>(n_batch, n_hidden unit)</td>
              </tr>
              <tr valign="top">
                <td>FC3<sup>g</sup></td>
                <td>(n_batch, n_hidden unit)</td>
                <td>(n_batch, 1)</td>
              </tr>
              <tr valign="top">
                <td>Sigmoid</td>
                <td>(n_batch, 1)</td>
                <td>(n_batch, 1)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>LSTM: long short-term memory.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>The layer retains the input.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>FC1: fully connected layer 1.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>FC2: fully connected layer 2.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>ReLU: rectified linear unit.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>UBS: unit-wise batch standardization.</p>
            </fn>
            <fn id="table1fn7">
              <p><sup>g</sup>FC3: fully connected layer 3.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Algorithm of unit-wise batch standardization. LSTM: long short-term memory; ReLU: rectified linear unit.</p>
          </caption>
          <graphic xlink:href="medinform_v10i3e30587_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Performance Evaluation</title>
        <p>In a health care system, a misdiagnosis involving a type 2 error may imply serious repercussions, and incorrect judgment involving a type 1 error may increase user fatigue. Therefore, we verified the performance of the algorithm by checking false-positive and false-negative rates. The false-positive rate (also known as type 1 error, fall-out, or false-alarm ratio) was calculated as the ratio between the number of negative events incorrectly identified as positive and the total number of actual negative events. The false-negative rate (type 2 error) was calculated as the number of samples misclassified as negative out of the total number of positive events.</p>
      </sec>
      <sec>
        <title>Experiment</title>
        <sec>
          <title>Overview</title>
          <sec>
            <title>Data Sets</title>
            <p>This study was reviewed and approved by the institutional review board (#HYUIRB-202111-003) of the Hanyang University, and the requirement for informed consent was waived. A widely used public database was employed to verify the applicability of the selective prediction model to time series health care data. Considering that the purpose of selective prediction is to reject uncertain predictions, we selected two data sets containing classes that can be misclassified [<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref42">42</xref>]: the <italic>human activity recognition using smartphones</italic> and the <italic>Massachusetts Institute of Technology-Beth Israel Hospital</italic> (MIT-BIH) data sets. Detailed descriptions of the data sets have been provided below.</p>
          </sec>
          <sec>
            <title>Human Activity Recognition Using Smartphones Data Set</title>
            <p>This data set consists of human gait signals monitored by an accelerometer and gyroscope with 6 different activity classes [<xref ref-type="bibr" rid="ref43">43</xref>]. The signal was measured by attaching Samsung Galaxy S2 smartphones with embedded inertial sensors to the waists of 30 subjects aged 19 to 48 years. Each subject performed six activities (standing, sitting, laying, walking, walking upstairs, and walking downstairs) at least two times for 12 to 15 seconds. The 3-axial linear acceleration and angular velocity were measured at 50 Hz using an embedded accelerometer and gyroscope. The experiments were video-recorded to label the data manually. The signals were preprocessed using a median filter and a third-order low-pass Butterworth filter with a 20-Hz cutoff frequency and then sampled in sliding windows of 2.56 seconds with 50% overlap (128 readings/window). A total of 10,299 data points were recorded. The training data were randomly selected from 70% of the data set, and the remaining data set was used for the test. The x, y, and z components of the body accelerometer, body gyroscope, and total (gravitational and body) accelerometers were treated as 9 input features. Each sample contained 128 sequences.</p>
          </sec>
          <sec>
            <title>MIT-BIH Arrhythmia Data Set</title>
            <p>This data set contains 48 half-hour excerpts of two-channel ambulatory electrocardiogram (ECG) recordings from 47 subjects [<xref ref-type="bibr" rid="ref44">44</xref>]. The recordings were digitized at 360 samples per second per channel with 11-bit resolution over a 10-mV range and annotated independently by 2 or more cardiologists. The data set is publicly available in the PhysioNet [<xref ref-type="bibr" rid="ref45">45</xref>] database. All protected health information was removed and deidentified using record numbers. A method described in a previous study was used for preprocessing data [<xref ref-type="bibr" rid="ref46">46</xref>]. First, ECG signals were divided into 10-second intervals. Subsequently, the signal was normalized between 0 and 1. Where the median of the R-R time interval in the ECG signal was T, the time from the R peak to 1.2 T was used as 1 segment. Because the length of the segment changes every 10 seconds, the length of the entire data set is zero-padded based on the longest time. The data set consisted of 109,446 data points with a sampling frequency of 125 Hz. Each data set contained 187 sequences grouped into five classes: N (normal beat), S (supraventricular premature beat), V (premature ventricular contraction), F (fusion of ventricular and normal beats), and Q (unclassifiable beat). Unclassifiable data were not included in this study. As the data for each class were highly imbalanced, 800 data samples were randomly extracted from each class [<xref ref-type="bibr" rid="ref46">46</xref>]. The data set was sampled for every run, and result was expressed as an average of the results. The data set was then randomly divided into sets: 80% for training and 20% for testing.</p>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Model Architecture and Parameters Setting</title>
        <sec>
          <title>Overview</title>
          <p>In this study, a selective prediction model was developed using LSTM. Deep learning models such as LSTM are considered effective for extracting meaningful features from raw data. No feature extractor was used in this study because a deep learning model is suitable for use with raw data. The prediction model architecture was determined and optimized based on previous studies, and hyperparameters were optimized using an extensive grid search [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>]. The details for each data set are described below.</p>
        </sec>
        <sec>
          <title>Human Activity Recognition Using Smartphones Data Set</title>
          <p>The LSTM model for the human activity recognition using smartphones data set had a single layer with 2 cells and 32 hidden units. For parameter setting, the learning rate was 0.0005, and the L2 regularization was set at a lambda of 0.00005. The mini batch size was 919, and the training epoch was 500. The optimal α and λ were 0.6 and 200, respectively.</p>
        </sec>
        <sec>
          <title>MIT-BIH Arrhythmia Data Set</title>
          <p>The LSTM model for the MIT-BIH arrhythmia data set had a single layer with 2 cells and 48 hidden units, a learning rate of 0.0001, a minibatch size of 640, and a training epoch of 2000. The optimal α was 0.2, and the optimal λ was 4.</p>
        </sec>
      </sec>
      <sec>
        <title>Comparison Method</title>
        <p>To prove that the UBS is effective for developing a proper selection function in an LSTM model with a reject option, we compared it with conventional batch normalization and a model without normalization. The false-positive and false-negative rates were also calculated, and a standard LSTM model without a selection function was used as the baseline.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>LSTM Performance for Prediction</title>
        <p>The baseline models should be optimized for LSTM models without a selection function for each data set. Therefore, we validated the LSTM model prediction performance without any selection. The test accuracies of the LSTM models optimized without a selection step for the human activity recognition using smartphones data set and the MIT-BIH arrhythmia data set are 92.35% and 97.23% for each data set. The precision of the model was 91.72% and the recall was 91.54% for the Human Activity Recognition Using Smartphones data set. For the MIT-BIH arrhythmia data set, the precision of the model was 87.13% and the recall was 78.64%. The F1-score for each data set were 91.63% and 82.67%, respectively.</p>
      </sec>
      <sec>
        <title>Coverage Violation</title>
        <p>After setting the target coverage, the empirical coverage of the test set was calculated for each normalization method. The target coverage rates were obtained from a previous study [<xref ref-type="bibr" rid="ref32">32</xref>]. As the target coverage is the target threshold, it should be set to a sufficiently reliable value. Therefore, the target coverages were set at 0.85, 0.90, and 0.95. The difference between the target coverage and the actual coverage value is called <italic>coverage violation</italic>, which estimates the extent to which the model can learn to select the samples as instructed by the target coverage hyperparameter. The experimental results for each data set are listed in <xref ref-type="table" rid="table2">Table 2</xref>. The coverage value was averaged for 5 different runs. As shown in <xref ref-type="table" rid="table2">Table 2</xref>, the empirical coverage with UBS produced superior results as they converged on the target coverage, whereas other normalization approaches showed relatively poor results.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Empirical coverage of the human activity recognition (HAR) using smartphones and the Massachusetts Institute of Technology-Beth Israel Hospital (MIT-BIH) arrhythmia data sets by different normalization methods. Target coverage was set before training.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="170"/>
            <col width="90"/>
            <col width="90"/>
            <col width="280"/>
            <col width="0"/>
            <col width="100"/>
            <col width="100"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td>
                  Target coverage
                </td>
                <td colspan="4">HAR using smartphones data set</td>
                <td colspan="3">MIT-BIH arrhythmia data set</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="4">Normalization method of selective prediction</td>
                <td colspan="3">Normalization method of selective prediction</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>UBS<sup>a</sup></td>
                <td>BN<sup>b</sup></td>
                <td>Without normalization<sup>c</sup></td>
                <td colspan="2">UBS</td>
                <td>BN</td>
                <td>Without normalization</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0.95, mean (SD)</td>
                <td>0.9660 (0.0029)</td>
                <td>0.9996 (0.0001)</td>
                <td>0.9986 (0.0002)</td>
                <td colspan="2">0.9564 (0.0019)</td>
                <td>0.9680 (0.0067)</td>
                <td>1.0000 (0)</td>
              </tr>
              <tr valign="top">
                <td>0.90, mean (SD)</td>
                <td>0.9053 (0.0035)</td>
                <td>0.9980 (0.0001)</td>
                <td>0.9984 (0.0001)</td>
                <td colspan="2">0.9084 (0.0055)</td>
                <td>0.9998 (0.0001)</td>
                <td>1.0000 (0)</td>
              </tr>
              <tr valign="top">
                <td>0.85, mean (SD)</td>
                <td>0.8582 (0.0007)</td>
                <td>0.9237 (0.0026)</td>
                <td>0.9986 (0.0002)</td>
                <td colspan="2">0.8888 (0.0016)</td>
                <td>0.9518 (0.0001)</td>
                <td>1.0000 (0)</td>
              </tr>
              <tr valign="top">
                <td>Average violation, %</td>
                <td>0.98</td>
                <td>7.38</td>
                <td>9.85</td>
                <td colspan="2">1.79</td>
                <td>7.32</td>
                <td>10.00</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>UBS: unit-wise batch standardization.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>BN: batch normalization (a normalization method using the mean and variance obtained from the input batch).</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup><italic>Without normalization</italic> means that there was no normalization in the selection function structure.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Selective Risk (Error Rate)</title>
        <p>The selective risks for each normalization method are presented in <xref ref-type="table" rid="table3">Table 3</xref>. The selective risk value was averaged from 5 different runs. In the selective prediction model with LSTM, the selective risk increased with coverage. UBS normalization achieved relatively superior performance with various target coverages compared with conventional batch normalization. If normalization was not applied, the risk varied widely.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Selective risk of the human activity recognition (HAR) using smartphones and the Massachusetts Institute of Technology-Beth Israel Hospital (MIT-BIH) arrhythmia data sets by different normalization methods.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="130"/>
            <col width="100"/>
            <col width="100"/>
            <col width="300"/>
            <col width="0"/>
            <col width="110"/>
            <col width="110"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>
                 Target coverage
                </td>
                <td colspan="4">HAR using smartphones data set</td>
                <td colspan="3">MIT-BIH arrhythmia data set</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="4">Normalization method of selective prediction</td>
                <td colspan="3">Normalization method of selective prediction</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>UBS<sup>a</sup></td>
                <td>BN<sup>b</sup></td>
                <td>Without normalization<sup>c</sup></td>
                <td colspan="2">UBS</td>
                <td>BN</td>
                <td>Without normalization</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0.95, mean (SD)</td>
                <td>0.1423 (0.0041)</td>
                <td>0.1611 (0.0445)</td>
                <td>0.1476 (0.0068)</td>
                <td colspan="2">0.1970 (0.0038)</td>
                <td>0.2175 (0.0108)</td>
                <td>0.2000 (0.4472)</td>
              </tr>
              <tr valign="top">
                <td>0.90, mean (SD)</td>
                <td>0.1232 (0.0042)</td>
                <td>0.1283 (0.0067)</td>
                <td>0.1312 (0.0139)</td>
                <td colspan="2">0.1791 (0.0050)</td>
                <td>0.3200 (0.1095)</td>
                <td>0.2000 (0.4472)</td>
              </tr>
              <tr valign="top">
                <td>0.85, mean (SD)</td>
                <td>0.1136 (0.0060)</td>
                <td>0.1170 (0.0024)</td>
                <td>0.1267 (0.0145)</td>
                <td colspan="2">0.1585 (0.0028)</td>
                <td>0.1967 (0.0064)</td>
                <td>0.2000 (0.4472)</td>
              </tr>
              <tr valign="top">
                <td>Average risk</td>
                <td>0.1264</td>
                <td>0.1355</td>
                <td>0.1352</td>
                <td colspan="2">0.1782</td>
                <td>0.2447</td>
                <td>0.2</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>UBS: unit-wise batch standardization.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>BN: batch normalization (a normalization method using the mean and variance obtained from the input batch).</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup><italic>Without normalization</italic> means that there was no normalization in the selection function structure.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>False-Positive and False-Negative Rates</title>
        <p>As the selective prediction model produced classification results only when it was confident about its own classification, we expected that both false-positive and false-negative rates would decrease. The false-positive and false-negative rates of each data set were calculated from the results of the model that achieved the best performance among 5 different runs (<xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref>). The baseline models were well-optimized LSTM models without a selection function for each data set.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>False-positive rates of the human activity recognition (HAR) using smartphones and the Massachusetts Institute of Technology-Beth Israel Hospital (MIT-BIH) arrhythmia data sets by different normalization methods.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="120"/>
            <col width="80"/>
            <col width="70"/>
            <col width="170"/>
            <col width="130"/>
            <col width="0"/>
            <col width="70"/>
            <col width="70"/>
            <col width="170"/>
            <col width="0"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>
                  Target coverage
                </td>
                <td colspan="5">HAR using smartphones data set</td>
                <td colspan="5">MIT-BIH arrhythmia data set</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">Normalization method of selective prediction</td>
                <td>General prediction<sup>a</sup></td>
                <td colspan="5">Normalization method of selective prediction</td>
                <td>General prediction</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>UBS<sup>b</sup></td>
                <td>BN<sup>c</sup></td>
                <td>Without normalization<sup>d</sup></td>
                <td>
                  <break/>
                </td>
                <td colspan="2">UBS</td>
                <td>BN</td>
                <td>Without normalization</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0.95, %</td>
                <td>2.04</td>
                <td>2.59</td>
                <td>2.65</td>
                <td>N/A<sup>e</sup></td>
                <td colspan="2">6.34</td>
                <td>7.67</td>
                <td>6.93</td>
                <td colspan="2">N/A</td>
              </tr>
              <tr valign="top">
                <td>0.90, %</td>
                <td>2.00</td>
                <td>3.00</td>
                <td>2.63</td>
                <td>N/A</td>
                <td colspan="2">5.39</td>
                <td>6.98</td>
                <td>6.77</td>
                <td colspan="2">N/A</td>
              </tr>
              <tr valign="top">
                <td>0.85, %</td>
                <td>2.22</td>
                <td>3.02</td>
                <td>2.63</td>
                <td>N/A</td>
                <td colspan="2">5.66</td>
                <td>7.03</td>
                <td>7.97</td>
                <td colspan="2">N/A</td>
              </tr>
              <tr valign="top">
                <td>Average false-positive rate, %</td>
                <td>2.09</td>
                <td>2.87</td>
                <td>2.64</td>
                <td>2.89</td>
                <td colspan="2">5.80</td>
                <td>7.23</td>
                <td>7.22</td>
                <td colspan="2">6.44</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>General prediction is the long short-term memory classification model's false-positive rate without a selection function.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>UBS: unit-wise batch standardization.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>BN: batch normalization (a normalization method using the mean and variance obtained from the input batch).</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup><italic>Without normalization</italic> means that there was no normalization in the selection function structure.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>False-negative rates of the human activity recognition (HAR) using smartphones and the Massachusetts Institute of Technology-Beth Israel Hospital (MIT-BIH) arrhythmia data sets by different normalization methods.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="80"/>
            <col width="80"/>
            <col width="180"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="80"/>
            <col width="80"/>
            <col width="180"/>
            <col width="0"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td>
                  Target coverage
                </td>
                <td colspan="6">HAR using smartphones data set</td>
                <td colspan="5">MIT-BIH arrhythmia data set</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="4">Normalization method of selective prediction</td>
                <td colspan="2">General prediction<sup>a</sup></td>
                <td colspan="4">Normalization method of selective prediction</td>
                <td>General prediction</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>UBS<sup>b</sup></td>
                <td>BN<sup>c</sup></td>
                <td>Without normalization<sup>d</sup></td>
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">UBS</td>
                <td>BN</td>
                <td>Without normalization</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0.95, %</td>
                <td>10.18</td>
                <td>17.17</td>
                <td>12.69</td>
                <td colspan="2">N/A<sup>e</sup></td>
                <td colspan="2">18.82</td>
                <td>23.33</td>
                <td>20.78</td>
                <td colspan="2">N/A</td>
              </tr>
              <tr valign="top">
                <td>0.90, %</td>
                <td>10.72</td>
                <td>15.04</td>
                <td>13.05</td>
                <td colspan="2">N/A</td>
                <td colspan="2">16.48</td>
                <td>20.94</td>
                <td>20.31</td>
                <td colspan="2">N/A</td>
              </tr>
              <tr valign="top">
                <td>0.85, %</td>
                <td>10.85</td>
                <td>14.46</td>
                <td>12.94</td>
                <td colspan="2">N/A</td>
                <td colspan="2">16.41</td>
                <td>21.44</td>
                <td>23.91</td>
                <td colspan="2">N/A</td>
              </tr>
              <tr valign="top">
                <td>Average false-negative rate, %</td>
                <td>10.58</td>
                <td>15.56</td>
                <td>12.89</td>
                <td colspan="2">14.48</td>
                <td colspan="2">17.24</td>
                <td>21.90</td>
                <td>21.67</td>
                <td colspan="2">26.47</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>General prediction is the long short-term memory classification model's false-positive rate without a selection function.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>UBS: unit-wise batch standardization.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>BN: batch normalization; which is a normalization method using the mean and variance obtained from the input batch.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>Without normalization means that there was no normalization in the selection function structure.</p>
            </fn>
            <fn id="table5fn5">
              <p><sup>e</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Learned Feature Representation</title>
        <p><xref rid="figure3" ref-type="fig">Figure 3</xref> shows the visualization of the features learned from the LSTM models using t-distributed stochastic neighbor embedding [<xref ref-type="bibr" rid="ref49">49</xref>]. <xref rid="figure3" ref-type="fig">Figure 3</xref> (left) depicts the test set sample that was not rejected when the target coverage was set at 0.95. The data set used in the visualization was the test set for the human activity recognition using smartphones data set. The <italic>Sitting</italic> (cyan) and <italic>Standing</italic> samples (blue) are more mixed in <xref rid="figure3" ref-type="fig">Figure 3</xref> (right) than in <xref rid="figure3" ref-type="fig">Figure 3</xref> (left). The <italic>Walking_Down_Stairs</italic> (green), <italic>Walking_Up_Stairs</italic> (orange), and <italic>Walking</italic> samples (red) are closely clustered in <xref rid="figure3" ref-type="fig">Figure 3</xref> (left), whereas some of them overlap in <xref rid="figure3" ref-type="fig">Figure 3</xref> (right).</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>t-Distributed stochastic neighbor embedding visualizations of learned features using all test samples in the human activity recognition using smartphones data set. Left: Long short-term memory with a reject option using unit-wise batch standardization results when the target coverage was 0.95. Rejected samples were not included in this figure. Right: long short-term memory model results without a reject option.</p>
          </caption>
          <graphic xlink:href="medinform_v10i3e30587_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our objective is to develop a selective prediction model using LSTM. The developed selective prediction model rejected samples using the confidence level of classifications. This selective prediction model with a reject option was trained to determine whether to obtain a classification based on targeted coverage. If the model's classification confidence was low, the model rejected the classification and did not apply information to backpropagate on samples. As a result, the selective prediction model was trained mainly using samples that had a sufficient confidence level, which guaranteed reliability and low error rates for samples that were not rejected. To implement selective prediction for LSTM, we conducted an experiment to identify a method of normalization that could improve the performance of the selection function.</p>
        <p>In health care systems, high accuracy is important, but low false-positive and false-negative rates are also essential. To handle various time series data obtained from a health care system, we devised a selective prediction model with LSTM using an effective selection function and focused on the structure of the function. As shown in <xref ref-type="table" rid="table1">Table 1</xref>, the output of the <italic>many-to-one</italic> LSTM includes hidden-unit information. Our goal was to deal with LSTMs that have <italic>many-to-one</italic> structures, but conventional batch normalization normalizes all batches at once. To tackle this problem, we devised UBS as a special method of normalization that attempts to normalize each hidden unit in LSTM. The false-positive and false-negative rates for each data set were meaningful. For each target coverage, the selective prediction model with UBS was superior to the model with batch normalization and the model without normalization (<xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref>). These findings show that a selective function using UBS can decrease false-positive and false-negative rates. On this basis, we interpreted that the model with UBS can learn class-specific features and consider which samples to reject in the training phase.</p>
        <p>UBS also helped the model be trained based on target coverage and reduced selective risk. Using 2 public health data sets, the empirical coverage violation of the selective prediction was lower than that of the other 2 methods. The selection function with the UBS had the lowest selective risk (<xref ref-type="table" rid="table3">Table 3</xref>). The MIT-BIH arrhythmia data set results show that the coverage of the model without normalization was high regardless of the target coverage. These findings imply that the selective function without normalization did not perform as desired. We assumed that these results were based on whether the normalization methods considered hidden-unit characteristics of LSTM.</p>
        <p>Regarding the learned feature representation, the classification model with the reject option differed from existing models. In <xref rid="figure3" ref-type="fig">Figure 3</xref>, a classification model with the reject option achieved relatively better classification performance than the conventional model without the reject option because the selective prediction LSTM model did not learn the features from samples with a low confidence level. As reported in a previous study [<xref ref-type="bibr" rid="ref32">32</xref>], this suggests that representational capacity was not wasted because the model was trained mainly on samples with a high confidence level using selective prediction. Using this property, selective prediction allows humans to classify samples with low reliability and act as a second opinion in health care applications. In summary, the selective prediction model successfully classified samples based on high confidence-level features and simultaneously reduced the error rate by using the reject option.</p>
        <p>Although our research supports the possibility of generating LSTM models with selective prediction, challenges remain. First, interpretation of the visualization of the learned features is limited in this study and needs to be addressed in further studies. Second, when LSTM was used for selective prediction, it was difficult to optimize parameters that control selection functions, such as α and λ, for each data set. During the experiments, we used only 2 data sets for testing and targeted only the reject option to determine the confidence level of classifications. In future studies, efficient optimization methods should be devised and applied to various models using various data sets.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, we developed LSTM classification models with a reject option to classify medical data time series. To develop the LSTM classification models with the reject option, UBS was applied. The UBS achieved superior performance (concerning coverage, risk, and false-positive and false-negative rates) compared with 2 other methods of normalization in experiments using 2 public time series data sets.</p>
        <p>If the performance in classifying nonrejected samples can be maximized by adjusting coverage or selective risks, humans can trust the output of a highly confident AI model and spend more time on other rejected samples (low confidence). The final performance (human+AI) can be maximized by appropriate automation using selective prediction.</p>
        <p>To the best of our knowledge, this is the first study demonstrating the possibility of an LSTM classification model with a reject option for time series data. Our findings may apply to various other time series data sets that require reliability.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ECG</term>
          <def>
            <p>electrocardiogram</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">MC</term>
          <def>
            <p>Monte Carlo</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MIT-BIH</term>
          <def>
            <p>Massachusetts Institute of Technology-Beth Israel Hospital</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">RNN</term>
          <def>
            <p>recurrent neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">UBS</term>
          <def>
            <p>unit-wise batch standardization</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by the Bio and Medical Technology Development Program of the National Research Foundation, which is funded by the Korean government, Ministry of Science and ICT (NRF-2017M3A9E1064781) and the Technology Innovation Program (Alchemist Project, 20012461) funded by the Korean Ministry of Trade, Industry, and Energy.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>This study was originally conceived by BRN. BRN developed a deep learning model and wrote draft of manuscript as a lead author. Data extraction and preprocessing was conducted by BRN and JYK. IYK and BHC jointly supervised this project as co-corresponding authors. All authors provided critical feedback and approved the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rong</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mendez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bou Assi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sawan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence in healthcare: review and prediction case studies</article-title>
          <source>Engineering</source>
          <year>2020</year>
          <volume>6</volume>
          <issue>3</issue>
          <fpage>291</fpage>
          <lpage>301</lpage>
          <pub-id pub-id-type="doi">10.1016/j.eng.2019.08.015</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>DY</given-names>
            </name>
            <name name-style="western">
              <surname>Cha</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>BH</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>Automatic stenosis recognition from coronary angiography using convolutional neural networks</article-title>
          <source>Comput Methods Programs Biomed</source>
          <year>2021</year>
          <volume>198</volume>
          <fpage>105819</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0169-2607(20)31652-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2020.105819</pub-id>
          <pub-id pub-id-type="medline">33213972</pub-id>
          <pub-id pub-id-type="pii">S0169-2607(20)31652-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Ro</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nam</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Yook</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>BH</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>IY</given-names>
            </name>
          </person-group>
          <article-title>Development of an automatic muscle atrophy measuring algorithm to calculate the ratio of supraspinatus in supraspinous fossa using deep learning</article-title>
          <source>Comput Methods Programs Biomed</source>
          <year>2019</year>
          <volume>182</volume>
          <fpage>105063</fpage>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2019.105063</pub-id>
          <pub-id pub-id-type="medline">31505380</pub-id>
          <pub-id pub-id-type="pii">S0169-2607(19)30360-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Chuah</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>Explainable deep learning based medical diagnostic system</article-title>
          <source>Smart Health</source>
          <year>2019</year>
          <volume>13</volume>
          <fpage>100068</fpage>
          <pub-id pub-id-type="doi">10.1016/j.smhl.2019.03.002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Colak</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Moreland</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Five principles for the intelligent use of AI in medical imaging</article-title>
          <source>Intensive Care Med</source>
          <year>2021</year>
          <volume>47</volume>
          <issue>2</issue>
          <fpage>154</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1007/s00134-020-06316-8</pub-id>
          <pub-id pub-id-type="medline">33449134</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00134-020-06316-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Quinn</surname>
              <given-names>TP</given-names>
            </name>
            <name name-style="western">
              <surname>Senadeera</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobs</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Coghlan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Trust and medical AI: the challenges we face and the expertise needed to overcome them</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <volume>28</volume>
          <issue>4</issue>
          <fpage>890</fpage>
          <lpage>4</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33340404"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa268</pub-id>
          <pub-id pub-id-type="medline">33340404</pub-id>
          <pub-id pub-id-type="pii">6042213</pub-id>
          <pub-id pub-id-type="pmcid">PMC7973477</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hengstler</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Enkel</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Duelli</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Applied artificial intelligence and trust—the case of autonomous vehicles and medical assistance devices</article-title>
          <source>Technol Forecast Soc Change</source>
          <year>2016</year>
          <volume>105</volume>
          <fpage>105</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1016/j.techfore.2015.12.014</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chow</surname>
              <given-names>CK</given-names>
            </name>
          </person-group>
          <article-title>An optimum character recognition system using decision functions</article-title>
          <source>IRE Trans Electron Comput</source>
          <year>1957</year>
          <volume>EC-6</volume>
          <issue>4</issue>
          <fpage>247</fpage>
          <lpage>54</lpage>
          <pub-id pub-id-type="doi">10.1109/tec.1957.5222035</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Deep learning for sensor-based activity recognition: a survey</article-title>
          <source>Pattern Recognit Lett</source>
          <year>2019</year>
          <volume>119</volume>
          <fpage>3</fpage>
          <lpage>11</lpage>
          <pub-id pub-id-type="doi">10.1016/j.patrec.2018.02.010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>Deep learning for healthcare: review, opportunities and challenges</article-title>
          <source>Brief Bioinform</source>
          <year>2018</year>
          <volume>19</volume>
          <issue>6</issue>
          <fpage>1236</fpage>
          <lpage>46</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28481991"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bib/bbx044</pub-id>
          <pub-id pub-id-type="medline">28481991</pub-id>
          <pub-id pub-id-type="pii">3800524</pub-id>
          <pub-id pub-id-type="pmcid">PMC6455466</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lipton</surname>
              <given-names>ZC</given-names>
            </name>
            <name name-style="western">
              <surname>Kale</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Elkan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wetzel</surname>
              <given-names>RC</given-names>
            </name>
          </person-group>
          <article-title>Learning to diagnose with LSTM recurrent neural networks</article-title>
          <source>Proceedings of the 4th International Conference on Learning Representations</source>
          <year>2016</year>
          <conf-name>ICLR '16</conf-name>
          <conf-date>May 2-4, 2016</conf-date>
          <conf-loc>San Juan, Puerto Rico</conf-loc>
          <pub-id pub-id-type="doi">10.1093/acref/9780195301731.013.43262</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Schuetz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Using recurrent neural network models for early detection of heart failure onset</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2017</year>
          <volume>24</volume>
          <issue>2</issue>
          <fpage>361</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27521897"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocw112</pub-id>
          <pub-id pub-id-type="medline">27521897</pub-id>
          <pub-id pub-id-type="pii">ocw112</pub-id>
          <pub-id pub-id-type="pmcid">PMC5391725</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Razavian</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sontag</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Multi-task prediction of disease onsets from longitudinal laboratory tests</article-title>
          <source>Proceedings of the 1st Machine Learning for Healthcare Conference</source>
          <year>2016</year>
          <conf-name>PMLR '16</conf-name>
          <conf-date>August 19-20, 2016</conf-date>
          <conf-loc>Los Angeles, CA</conf-loc>
          <fpage>73</fpage>
          <lpage>100</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reddy</surname>
              <given-names>BK</given-names>
            </name>
            <name name-style="western">
              <surname>Delen</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Predicting hospital readmission for lupus patients: an RNN-LSTM-based deep-learning methodology</article-title>
          <source>Comput Biol Med</source>
          <year>2018</year>
          <volume>101</volume>
          <fpage>199</fpage>
          <lpage>209</lpage>
          <pub-id pub-id-type="doi">10.1016/j.compbiomed.2018.08.029</pub-id>
          <pub-id pub-id-type="medline">30195164</pub-id>
          <pub-id pub-id-type="pii">S0010-4825(18)30256-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Şentürk</surname>
              <given-names>Ü</given-names>
            </name>
            <name name-style="western">
              <surname>Yücedağ</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Polat</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Repetitive neural network (RNN) based blood pressure estimation using PPG and ECG signals</article-title>
          <source>2nd International Symposium on Multidisciplinary Studies and Innovative Technologies</source>
          <year>2018</year>
          <conf-name>ISMSIT '18</conf-name>
          <conf-date>October 19-21, 2018</conf-date>
          <conf-loc>Ankara, Turkey</conf-loc>
          <fpage>1</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1109/ismsit.2018.8567071</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Su</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>XR</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>YT</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Miao</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Long-term blood pressure prediction with deep recurrent neural networks</article-title>
          <source>IEEE EMBS International Conference on Biomedical &#38; Health Informatics</source>
          <year>2018</year>
          <conf-name>BHI '18</conf-name>
          <conf-date>March 4-7, 2018</conf-date>
          <conf-loc>Las Vegas, NV</conf-loc>
          <fpage>323</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1109/bhi.2018.8333434</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jeong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Interpretation of electrocardiogram (ECG) rhythm by combined CNN and BiLSTM</article-title>
          <source>IEEE Access</source>
          <year>2020</year>
          <volume>8</volume>
          <fpage>125380</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2020.3006707</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rana</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>KK</given-names>
            </name>
          </person-group>
          <article-title>ECG heartbeat classification using a single layer LSTM model</article-title>
          <source>International SoC Design Conference</source>
          <year>2019</year>
          <conf-name>ISOCC '19</conf-name>
          <conf-date>October 6-9, 2019</conf-date>
          <conf-loc>Jeju, South Korea</conf-loc>
          <fpage>267</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1109/isocc47750.2019.9027740</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hernández</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Suárez</surname>
              <given-names>LF</given-names>
            </name>
            <name name-style="western">
              <surname>Villamizar</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Altuve</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Human activity recognition on smartphones using a bidirectional LSTM network</article-title>
          <source>XXII Symposium on Image, Signal Processing and Artificial Vision</source>
          <year>2019</year>
          <conf-name>STSIVA '19</conf-name>
          <conf-date>April 24-26, 2019</conf-date>
          <conf-loc>Bucaramanga, Colombia</conf-loc>
          <fpage>1</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1109/stsiva.2019.8730249</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hammerla</surname>
              <given-names>NY</given-names>
            </name>
            <name name-style="western">
              <surname>Halloran</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Plötz</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Deep, convolutional, and recurrent models for human activity recognition using wearables</article-title>
          <source>Proceedings of the 25th International Joint Conference on Artificial Intelligence</source>
          <year>2016</year>
          <conf-name>IJCAI '16</conf-name>
          <conf-date>July 9-15, 2016</conf-date>
          <conf-loc>New York, NY</conf-loc>
          <fpage>1533</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chowdhury</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Hasan</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Sharmin</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Robust heart rate estimation from PPG signals with intense motion artifacts using cascade of adaptive filter and recurrent neural network</article-title>
          <source>2019 IEEE Region 10 Conference</source>
          <year>2019</year>
          <conf-name>TENCON '19</conf-name>
          <conf-date>October 17-20, 2019</conf-date>
          <conf-loc>Kochi, India</conf-loc>
          <fpage>1952</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1109/tencon.2019.8929692</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chevalier</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Deep residual Bidir-LSTM for human activity recognition using wearable sensors</article-title>
          <source>Math Probl Eng</source>
          <year>2018</year>
          <volume>2018</volume>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1155/2018/7316954</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cordella</surname>
              <given-names>LP</given-names>
            </name>
            <name name-style="western">
              <surname>De Stefano</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tortorella</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Vento</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A method for improving classification reliability of multilayer perceptrons</article-title>
          <source>IEEE Trans Neural Netw</source>
          <year>1995</year>
          <volume>6</volume>
          <issue>5</issue>
          <fpage>1140</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1109/72.410358</pub-id>
          <pub-id pub-id-type="medline">18263404</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Stefano</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sansone</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vento</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>To reject or not to reject: that is the question-an answer in case of neural classifiers</article-title>
          <source>IEEE Trans Syst, Man, Cybern C</source>
          <year>2000</year>
          <volume>30</volume>
          <issue>1</issue>
          <fpage>84</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1109/5326.827457</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>El-Yaniv</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wiener</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Pointwise tracking the optimal regression function</article-title>
          <source>Advances in Neural Information Processing Systems 25</source>
          <year>2012</year>
          <conf-name>NIPS '12</conf-name>
          <conf-date>December 3-8, 2012</conf-date>
          <conf-loc>Lake Tahoe, NV</conf-loc>
          <fpage>2042</fpage>
          <lpage>50</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Geifman</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>El-Yaniv</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Selective classification for deep neural networks</article-title>
          <source>Proceedings of the 31st International Conference on Neural Information Processing Systems</source>
          <year>2017</year>
          <conf-name>NIPS '17</conf-name>
          <conf-date>December 4-9, 2017</conf-date>
          <conf-loc>Long Beach, CA</conf-loc>
          <fpage>4885</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.7551/mitpress/11474.003.0014</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blundell</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cornebise</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kavukcuoglu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wierstra</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Weight uncertainty in neural network</article-title>
          <source>Proceedings of The 32nd International Conference on Machine Learning</source>
          <year>2015</year>
          <conf-name>ICML '15</conf-name>
          <conf-date>July 6-11, 2015</conf-date>
          <conf-loc>Lille, France</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lipton</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>BBQ-networks: efficient exploration in deep reinforcement learning for task-oriented dialogue systems</article-title>
          <source>Proceedings of the 32nd AAAI Conference on Artificial Intelligence</source>
          <year>2018</year>
          <conf-name>AAAI '18</conf-name>
          <conf-date>February 2-7, 2018</conf-date>
          <conf-loc>New Orleans, LA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Houthooft</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Schulman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>De Turck</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Abbeel</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>VIME: variational information maximizing exploration</article-title>
          <source>Proceedings of the 30th International Conference on Neural Information Processing Systems</source>
          <year>2016</year>
          <conf-name>NIPS '16</conf-name>
          <conf-date>December 5-10, 2016</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <fpage>1117</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.1016/S0377-0427(00)00433-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fortunato</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Blundell</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vinyals</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Bayesian recurrent neural networks</article-title>
          <source>arXiv (forthcoming)</source>
          <year>2017</year>
          <fpage>1</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1704.02798.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cortes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>DeSalvo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mohri</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Learning with rejection</article-title>
          <source>Proceedings of the 27th International Conference on Algorithmic Learning Theory</source>
          <year>2016</year>
          <conf-name>ALT '016</conf-name>
          <conf-date>October 19-21, 2016</conf-date>
          <conf-loc>Bari, Italy</conf-loc>
          <fpage>67</fpage>
          <lpage>82</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-319-46379-7_5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Geifman</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>El-Yaniv</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>SelectiveNet: a deep neural network with an integrated reject option</article-title>
          <source>Proceedings of The 36th International Conference on Machine Learning</source>
          <year>2019</year>
          <conf-name>ICML '19</conf-name>
          <conf-date>June 10-15, 2019</conf-date>
          <conf-loc>Long Beach, CA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Comput</source>
          <year>1997</year>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
          <pub-id pub-id-type="medline">9377276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>El-Yaniv</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wiener</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>On the foundations of noise-free selective classification</article-title>
          <source>J Mach Learn Res</source>
          <year>2010</year>
          <volume>11</volume>
          <issue>53</issue>
          <fpage>1605</fpage>
          <lpage>41</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Potra</surname>
              <given-names>FA</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Interior-point methods</article-title>
          <source>J Comput Appl Math</source>
          <year>2000</year>
          <volume>124</volume>
          <issue>1-2</issue>
          <fpage>281</fpage>
          <lpage>302</lpage>
          <pub-id pub-id-type="doi">10.1016/s0377-0427(00)00433-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ioffe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Szegedy</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Batch normalization: accelerating deep network training by reducing internal covariate shift</article-title>
          <source>Proceedings of the 32nd International Conference on Machine Learning</source>
          <year>2015</year>
          <conf-name>ICML '15</conf-name>
          <conf-date>July 6-11, 2015</conf-date>
          <conf-loc>Lille, France</conf-loc>
          <fpage>448</fpage>
          <lpage>56</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Group normalization</article-title>
          <source>Proceedings of the 15th European Conference on Computer Vision</source>
          <year>2018</year>
          <conf-name>ECCV '18</conf-name>
          <conf-date>September 8-14, 2018</conf-date>
          <conf-loc>Munich, Germany</conf-loc>
          <fpage>3</fpage>
          <lpage>19</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-030-01261-8_1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bulbul</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cetin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dogru</surname>
              <given-names>IA</given-names>
            </name>
          </person-group>
          <article-title>Human activity recognition using smartphones</article-title>
          <source>Proceedings of the 2nd International Symposium on Multidisciplinary Studies and Innovative Technologies</source>
          <year>2018</year>
          <conf-name>ISMSIT '18</conf-name>
          <conf-date>October 19-21, 2018</conf-date>
          <conf-loc>Ankara, Turkey</conf-loc>
          <fpage>1</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1109/ismsit.2018.8567275</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Deep learning models for real-time human activity recognition with smartphones</article-title>
          <source>Mobile Netw Appl</source>
          <year>2019</year>
          <volume>25</volume>
          <issue>2</issue>
          <fpage>743</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.1007/s11036-019-01445-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>LSTM-CNN architecture for human activity recognition</article-title>
          <source>IEEE Access</source>
          <year>2020</year>
          <volume>8</volume>
          <fpage>56855</fpage>
          <lpage>66</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2020.2982225</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>ECG-signal classification using SVM with multi-feature</article-title>
          <source>The 8th IEEE International Symposium on Next-Generation Electronics</source>
          <year>2019</year>
          <conf-name>ISNE '19</conf-name>
          <conf-date>October 9-10, 2019</conf-date>
          <conf-loc>Zhengzhou, China</conf-loc>
          <pub-id pub-id-type="doi">10.1109/isne.2019.8896430</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Desai</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Martis</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Nayak</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Sarika</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Seshikala</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Machine intelligent diagnosis of ECG for arrhythmia classification using DWT, ICA and SVM techniques</article-title>
          <source>2015 Annual IEEE India Conference</source>
          <year>2015</year>
          <conf-name>INDICON '15</conf-name>
          <conf-date>December 17-20, 2015</conf-date>
          <conf-loc>New Delhi, India</conf-loc>
          <pub-id pub-id-type="doi">10.1109/indicon.2015.7443220</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anguita</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ghio</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oneto</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Parra</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Reyes-Ortiz</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>A public domain dataset for human activity recognition using smartphones</article-title>
          <source>Proceedings of 2013 European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning</source>
          <year>2013</year>
          <conf-name>ESANN '13</conf-name>
          <conf-date>April 24-26, 2013</conf-date>
          <conf-loc>Bruges, Belgium</conf-loc>
          <fpage>437</fpage>
          <lpage>42</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>The impact of the MIT-BIH arrhythmia database</article-title>
          <source>IEEE Eng Med Biol Mag</source>
          <year>2001</year>
          <volume>20</volume>
          <issue>3</issue>
          <fpage>45</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1109/51.932724</pub-id>
          <pub-id pub-id-type="medline">11446209</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldberger</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Amaral</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Glass</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hausdorff</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Ivanov</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Mietus</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Stanley</surname>
              <given-names>HE</given-names>
            </name>
          </person-group>
          <article-title>PhysioBank, PhysioToolkit, and PhysioNet: components of a new research resource for complex physiologic signals</article-title>
          <source>Circulation</source>
          <year>2000</year>
          <volume>101</volume>
          <issue>23</issue>
          <fpage>E215</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1161/01.cir.101.23.e215</pub-id>
          <pub-id pub-id-type="medline">10851218</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kachuee</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fazeli</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sarrafzadeh</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>ECG heartbeat classification: a deep transferable representation</article-title>
          <source>2018 IEEE International Conference on Healthcare Informatics</source>
          <year>2018</year>
          <conf-name>ICHI '18</conf-name>
          <conf-date>June 4-7, 2018</conf-date>
          <conf-loc>New York, NY</conf-loc>
          <fpage>443</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1109/ichi.2018.00092</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Tung</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sigal</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Interpretable spatio-temporal attention for video action recognition</article-title>
          <source>2019 IEEE/CVF International Conference on Computer Vision Workshop</source>
          <year>2019</year>
          <conf-name>ICCVW '19</conf-name>
          <conf-date>October 27-28, 2019</conf-date>
          <conf-loc>Seoul, South Korea</conf-loc>
          <fpage>1513</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.1109/iccvw.2019.00189</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Anwar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Saleh</surname>
              <given-names>FS</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>UC-Net: uncertainty inspired RGB-D saliency detection via conditional variational autoencoders</article-title>
          <source>2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>
          <year>2020</year>
          <conf-name>CVPR '20</conf-name>
          <conf-date>June 13-19, 2020</conf-date>
          <conf-loc>Seattle, WA</conf-loc>
          <fpage>8579</fpage>
          <lpage>88</lpage>
          <pub-id pub-id-type="doi">10.1109/cvpr42600.2020.00861</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Van der Maaten</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Visualizing data using t-SNE</article-title>
          <source>J Mach Learn Res</source>
          <year>2008</year>
          <volume>9</volume>
          <issue>86</issue>
          <fpage>2579</fpage>
          <lpage>605</lpage>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
