<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i11e26914</article-id>
      <article-id pub-id-type="pmid">34747711</article-id>
      <article-id pub-id-type="doi">10.2196/26914</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Local Differential Privacy in the Medical Domain to Protect Sensitive Information: Algorithm Development and Real-World Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Vilaplana</surname>
            <given-names>Jordi</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Sung</surname>
            <given-names>MinDong</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5217-8877</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Cha</surname>
            <given-names>Dongchul</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0043-5026</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Park</surname>
            <given-names>Yu Rang</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biomedical Systems Informatics</institution>
            <institution>Yonsei University College of Medicine</institution>
            <addr-line>Yonsei-ro 50-1</addr-line>
            <addr-line>Seoul, 03722</addr-line>
            <country>Republic of Korea</country>
            <fax>82 2 227 8354</fax>
            <phone>82 2 228 2363</phone>
            <email>yurangpark@yuhs.ac</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4210-2094</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biomedical Systems Informatics</institution>
        <institution>Yonsei University College of Medicine</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Otorhinolaryngology</institution>
        <institution>Yonsei University College of Medicine</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Yu Rang Park <email>yurangpark@yuhs.ac</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>8</day>
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>11</issue>
      <elocation-id>e26914</elocation-id>
      <history>
        <date date-type="received">
          <day>4</day>
          <month>1</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>28</day>
          <month>1</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>10</day>
          <month>2</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>6</day>
          <month>9</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©MinDong Sung, Dongchul Cha, Yu Rang Park. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 08.11.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/11/e26914" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Privacy is of increasing interest in the present big data era, particularly the privacy of medical data. Specifically, differential privacy has emerged as the standard method for preservation of privacy during data analysis and publishing.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>Using machine learning techniques, we applied differential privacy to medical data with diverse parameters and checked the feasibility of our algorithms with synthetic data as well as the balance between data privacy and utility.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>All data were normalized to a range between –1 and 1, and the bounded Laplacian method was applied to prevent the generation of out-of-bound values after applying the differential privacy algorithm. To preserve the cardinality of the categorical variables, we performed postprocessing via discretization. The algorithm was evaluated using both synthetic and real-world data (from the eICU Collaborative Research Database). We evaluated the difference between the original data and the perturbated data using misclassification rates and the mean squared error for categorical data and continuous data, respectively. Further, we compared the performance of classification models that predict in-hospital mortality using real-world data.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The misclassification rate of categorical variables ranged between 0.49 and 0.85 when the value of ε was 0.1, and it converged to 0 as ε increased. When ε was between 10<sup>2</sup> and 10<sup>3</sup>, the misclassification rate rapidly dropped to 0. Similarly, the mean squared error of the continuous variables decreased as ε increased. The performance of the model developed from perturbed data converged to that of the model developed from original data as ε increased. In particular, the accuracy of a random forest model developed from the original data was 0.801, and this value ranged from 0.757 to 0.81 when ε was 10<sup>-1</sup> and 10<sup>4</sup>, respectively.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We applied local differential privacy to medical domain data, which are diverse and high dimensional. Higher noise may offer enhanced privacy, but it simultaneously hinders utility. We should choose an appropriate degree of noise for data perturbation to balance privacy and utility depending on specific situations.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>privacy-preserving</kwd>
        <kwd>differential privacy</kwd>
        <kwd>medical informatics</kwd>
        <kwd>medical data</kwd>
        <kwd>privacy</kwd>
        <kwd>electronic health record</kwd>
        <kwd>algorithm</kwd>
        <kwd>development</kwd>
        <kwd>validation</kwd>
        <kwd>big data</kwd>
        <kwd>medical data</kwd>
        <kwd>feasibility</kwd>
        <kwd>machine learning</kwd>
        <kwd>synthetic data</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Big data is a core factor in the renovation of medicine. The raw data have low utility; however, applying algorithms such as machine learning (ML) enables us to make the most of these data [<xref ref-type="bibr" rid="ref1">1</xref>]. Unlike rule-based systems, ML algorithms are data driven and require a large amount of data. Particularly, conventional ML approaches require centralized data for learning. To obtain this substantial amount of data, it is necessary to exchange data among different organizations to develop an effective ML model.</p>
      <p>However, the exchange of data between different parties causes privacy problems, and there are increasing concerns about privacy violations by large companies [<xref ref-type="bibr" rid="ref2">2</xref>]. Medical data that mostly contain sensitive information should be appropriately protected when shared with third parties. The European Union’s General Data Protection Regulation [<xref ref-type="bibr" rid="ref3">3</xref>] and the United States’ Health Insurance Portability and Accountability Act of 1996 (HIPAA) [<xref ref-type="bibr" rid="ref4">4</xref>] recognize this problem and require users’ privacy to be strengthened. Medical data have various distinct properties in addition to their sensitive attributes. For example, serum glucose levels are continuous, whereas medical histories are usually recorded using categorical values. Medical data also contain multimodal values: some of the data may be obtained from blood tests, whereas others may originate from radiologic and physical examination tests.</p>
      <p>Deidentification is defined as “the removal or replacement of personal identifiers so that it would be difficult to reestablish a link between the individual and his or her data [<xref ref-type="bibr" rid="ref5">5</xref>].” Especially, in the HIPAA, data is considered as deidentified when specified data elements are removed [<xref ref-type="bibr" rid="ref4">4</xref>]. Anonymization is defined as “the irreversible removal of the link between the individual and his or her medical record data to the degree that it would be virtually impossible to reestablish the link [<xref ref-type="bibr" rid="ref5">5</xref>].” In such a case, the anonymized data could never be reidentified using the data in the underlying data sets. There are three primary ways to anonymize these data: suppression, generalization, and noise addition [<xref ref-type="bibr" rid="ref6">6</xref>]. Deidentification may not necessarily be anonymized. That is, anonymization is a subset of deidentification. Following anonymization, three main measures to identify the privacy risk can be evaluated: <italic>k</italic>-anonymity [<xref ref-type="bibr" rid="ref7">7</xref>], <italic>l</italic>-diversity [<xref ref-type="bibr" rid="ref8">8</xref>], and <italic>t</italic>-closeness [<xref ref-type="bibr" rid="ref9">9</xref>]. Deidentification tools, such as ARX [<xref ref-type="bibr" rid="ref10">10</xref>], offer seamless privacy protection through feature generalization and the suppression of records.</p>
      <p>Differential privacy [<xref ref-type="bibr" rid="ref11">11</xref>], which entails a semantic model, is another data privacy approach. Compared to syntactic anonymity, it requires less domain knowledge and is inherently robust to linkage attacks combined with domain knowledge. Moreover, differential privacy is considered to be a de facto standard for private data analysis or publishing [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Technology companies such as Apple and Google have attempted to apply differential privacy to protect the privacy of mobile data [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Moreover, the rapid development of the Internet of Things (IoT) should consider privacy risk [<xref ref-type="bibr" rid="ref16">16</xref>]. Researchers have been actively applying differential privacy to the IoT, such as automatically driving cars [<xref ref-type="bibr" rid="ref17">17</xref>] and sensors [<xref ref-type="bibr" rid="ref16">16</xref>]. In ML, personal information can be leaked. Applying differential privacy to the deep learning model can overcome this threat [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>], and the health care domain is no exception. Several studies have been performed in the health care domain. For example, Kim et al [<xref ref-type="bibr" rid="ref20">20</xref>] introduced a local differential privacy algorithm for health data streams. Also, Suriyakumar et al [<xref ref-type="bibr" rid="ref21">21</xref>] investigated the feasibility of differentially private stochastic gradient descent in a health care setting with the influential function. Most studies focus on a data set that has only a few features and focus on differential privacy in the deep learning model.</p>
      <p>In this study, we focused on local differential privacy with regard to multivariate medical data. We applied differential privacy with diverse parameters and checked (1) the feasibility of training our algorithms with synthetic data and (2) the balance between data privacy and utility with regard to ML techniques.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p><xref rid="figure1" ref-type="fig">Figure 1</xref> presents the workflow employed to achieve differential privacy in this study. When a user requests data, we perturb the data using the bounded Laplacian method (<inline-graphic xlink:href="medinform_v9i11e26914_fig5.png" xlink:type="simple" mimetype="image"/>) and discretization postprocessing (<inline-graphic xlink:href="medinform_v9i11e26914_fig6.png" xlink:type="simple" mimetype="image"/>) to provide high-fidelity data while preserving the privacy of the original data.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Differential privacy upon data request from third party users. The owner perturbs the original data to preserve privacy before sending the data externally. The third-party user can be either a curator or the final user. &#60;inline-graphic xlink:href="medinform_v9i11e26914_fig5.png" xlink:type="simple" mimetype="image"/&#62;: bounded Laplacian method; &#60;inline-graphic xlink:href="medinform_v9i11e26914_fig6.png" xlink:type="simple" mimetype="image"/&#62;: discretization postprocessing.</p>
        </caption>
        <graphic xlink:href="medinform_v9i11e26914_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <sec>
        <title>The Value of ε for Local Differential Privacy</title>
        <p>Dwork et al [<xref ref-type="bibr" rid="ref22">22</xref>] defined <italic>ε</italic>-differential privacy as a randomized function. For adjacent data Y<sub>1</sub> and Y<sub>2</sub>, function κ is (<italic>ε</italic>, <italic>δ</italic>)–differentially private if</p>
        <p>
          <disp-formula>P[κ(Y<sub>1</sub>) ∈ S] ≤ ε ∙ P[κ(Y<sub>2</sub>) ∈ S] + δ</disp-formula>
        </p>
        <p>where <italic>S ⊂ Range</italic>(κ). Local differential privacy is a specific case in which the random function or perturbation is applied by data owners, not by central aggregators.</p>
      </sec>
      <sec>
        <title>Bounded Laplacian Method</title>
        <p>Before applying local differential privacy, all variables were normalized to a range between –1 and 1. First, we applied the bounded Laplacian method. Because a conventional Laplacian distribution yields an infinite boundary, it entails some limitations when applied to clinical domains. For example, respiratory rates, which are supposed to be a positive number, may become negative after applying the conventional Laplacian method, which is illogical. There are two methods to overcome this problem: the truncation method and the bound method [<xref ref-type="bibr" rid="ref23">23</xref>]. We focused on the latter to minimize the probability of data manipulation because changes in data in the medical domain may have a considerable impact on the desired outputs.</p>
        <p>We used the bounded Laplacian function proposed by Holohan et al [<xref ref-type="bibr" rid="ref23">23</xref>], assuming that the input variable is within the output domain. Given <italic>b</italic> &#62; 0, <italic>W<sub>q</sub></italic>: <italic>Ω</italic> → <italic>D</italic>, for each <italic>q</italic> ∈ <italic>D</italic>, we defined the probability density function <inline-graphic xlink:href="medinform_v9i11e26914_fig7.png" xlink:type="simple" mimetype="image"/> as:</p>
        <p>
          <disp-formula>
            <graphic xlink:href="medinform_v9i11e26914_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>where</p>
        <p>
          <disp-formula>
            <graphic xlink:href="medinform_v9i11e26914_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>We set <italic>δ</italic>=0,<italic>l</italic> (lower bound) as –1, <italic>u</italic> (upper bound) as 1, and ∆<italic>Q</italic> as 2 in our experiments and adjusted <italic>ε</italic> to measure the effect of the privacy changes.</p>
      </sec>
      <sec>
        <title>Discretization Postprocessing for Discrete Variables</title>
        <p>Because we applied the bounded Laplacian method to perturb the given data to a range between –1 and 1 in a continuous manner, there are infinite possibilities for a given input. Many medical domain variables are categorical (either ordinal or nominal), such as medicosurgical histories. Therefore, following the application of the bounded Laplacian method, additional postprocessing was performed for categorical variables. We distributed the intermediate output of the given data over the Bernoulli distribution, similar to the method proposed by Yang et al [<xref ref-type="bibr" rid="ref17">17</xref>]. The perturbed data <italic>y</italic> ∈ [–<italic>C</italic>,<italic>C</italic>] were separated into m pieces, where m is the cardinality of the original input variable (a positive integer). We first shifted the range [–<italic>C</italic>,<italic>C</italic>] to [0, <italic>m</italic>] by equally dividing the space, which resulted in <inline-graphic xlink:href="medinform_v9i11e26914_fig10.png" xlink:type="simple" mimetype="image"/> intervals. Therefore, for given perturbed data y, we obtain the following:</p>
        <p>
          <disp-formula>
            <graphic xlink:href="medinform_v9i11e26914_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>After calculating k, the Bernoulli probability <italic>p</italic> was sampled such that</p>
        <p>
          <disp-formula>
            <graphic xlink:href="medinform_v9i11e26914_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>which is the distance between two adjacent possibilities. Finally, we discretized the perturbed data <italic>y</italic> concerning the Bernoulli probability <italic>p</italic> such that</p>
        <p>
          <disp-formula>
            <graphic xlink:href="medinform_v9i11e26914_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>where <inline-graphic xlink:href="medinform_v9i11e26914_fig14.png" xlink:type="simple" mimetype="image"/> denotes the Bernoulli distribution function.</p>
      </sec>
      <sec>
        <title>Data Set for Validation</title>
        <p>We used simulated (randomly generated) data for initial validation to ensure that the bounded Laplacian method functions as expected. To simulate real-world use, we used the eICU Collaborative Research Database [<xref ref-type="bibr" rid="ref24">24</xref>]. First, to evaluate the extent to which the proposed differential privacy algorithms effectively perturbed the given original data, we used the misclassification rate for categorical variables and mean squared error (MSE) for continuous variables when measuring the similarity between two data sets. Second, to evaluate the adverse effect of differential privacy on the utility of the data set, we compared the accuracy of predicting the mortality rate following intensive care unit admission using Acute Physiology and Chronic Health Evaluation (APACHE) [<xref ref-type="bibr" rid="ref25">25</xref>] scoring variables under various ε values. The data set contained intubated, ventilation, dialysis, medication status (cardinality: 2), eyes (cardinality: 4), motor (cardinality: 5), and verbal status (cardinality: 6) as categorical variables. Urine output, temperature, respiratory rate, sodium, heart rate, mean blood pressure, pH, hematocrit, creatinine, albumin, oxygen pressure, CO<sub>2</sub> pressure, blood urea nitrogen, glucose, bilirubin, and fraction of inspired oxygen (FiO<sub>2</sub>) values were considered continuous variables. There were initially 148,532 patients (rows) in the data set, but after the deletion of missing values, the data set contained a total of 4740 patients (3597 who were alive and 1143 who had died). The following ML methods were used for mortality prediction: decision tree, K-nearest neighbor, support vector machine, logistic regression, naïve Bayes, and random forest. The data were divided into training and test sets in a ratio of 80:20. All predictions were averaged using a 5-fold cross-validation method, and the scikit-learn [<xref ref-type="bibr" rid="ref26">26</xref>] library was used with the Python programming language.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Synthetic Data for Validation of the Bounded Laplacian Function</title>
        <p>We created an equally spaced distribution, ranging between –1 and 1, and applied the bounded Laplacian method. In contrast to the conventional Laplacian method, which has an infinite range, the bounded method entailed a range of –1 to 1.</p>
        <p>After confirming that the bounded Laplacian method works as intended, we then created synthetic continuous data that range from –1 to 1 and applied the conventional Laplacian method and bounded Laplacian method with <italic>ε</italic>=0.1, <italic>δ</italic>=0 (<xref rid="figure2" ref-type="fig">Figure 2</xref>A). The original Laplacian method had out-of-range occurrences that were not present in the bounded Laplacian method. To test the categorical data and postdiscretization processing, we created a set of 100 random integers ranging from 0 to 9, then normalized them to range from –1 to 1. The original Laplacian method had some occurrences that were out of bounds. In the categorical data, the bounded Laplacian method stayed within the data range, as in the continuous data. However, some of the categorical values were not initially present in the given data (<xref rid="figure2" ref-type="fig">Figure 2</xref>B), which is similar to the out-of-bounds condition. Therefore, additional postprocessing discretization was performed, and the algorithm showed that the discretization technique ensures that there are no nonexistent values in the categorical data (<xref rid="figure2" ref-type="fig">Figure 2</xref>C).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Comparison of conventional and bounded Laplacian methods using synthetic data. (A) Histogram of randomly generated continuous data ranging from –1 to 1. (B) Histogram of randomly generated categorical data, which originally ranged from 0 to 9 and were then normalized to range from –1 to 1. (C) Histogram obtained after application of discretization postprocessing to the data in (B). In all scenarios, the Laplacian method was applied with <italic>ε</italic>=0.1, <italic>δ</italic>=0.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e26914_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Validation Using Real-World Data</title>
        <p>The eICU Collaborative Research Database [<xref ref-type="bibr" rid="ref24">24</xref>] was used for validation. We used MSEs and misclassification rates as metrics for continuous and categorical variables, respectively, to calculate the differences between the original and perturbed data. Because of the variance between values in the original data, the MSE of continuous variables varies extensively in the case of eICU data. For example, pH and albumin are similar among different individuals, whereas heart rate and glucose have substantial differences (<xref rid="figure3" ref-type="fig">Figure 3</xref>A). Regarding the categorical variables, intubated, ventilation, and dialysis status are either 0 or 1, and the chance level is 0.5. The value for “eye” ranges from 1 to 4, that for “verbal” ranges from 1 to 5, and that for “motor” ranges from 1 to 6. Therefore, there were differences in the misclassification rates, especially when <italic>ε</italic> was small (<xref rid="figure3" ref-type="fig">Figure 3</xref>B). As <italic>ε</italic> increased, all perturbed values approached their original values for both continuous and categorical variables (<xref rid="figure3" ref-type="fig">Figures 3</xref>A and 3B).</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>ε values and degrees of data perturbation for (A) continuous variables and (B) categorical variables. bun: blood urea nitrogen; fio2: fraction of inspired oxygen; meanbp: mean blood pressure; pao2: partial pressure of oxygen, arterial; pco2: partial pressure of carbon dioxide; wbc: white blood cells.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e26914_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>To simulate data utility with respect to <italic>ε</italic>, we constructed a predictive classifier to predict mortality using the eICU data set. Note that 3,597 of the 4,740 patients (75.9%) were alive, yielding a chance level of 76%. A lower value of <italic>ε</italic> caused severe data perturbation, resulting in an accuracy that was near the chance level. Increasing the value of <italic>ε</italic> increased the performance of the classifiers, and the performance converged to the accuracy obtained using the original data (shown as dashed lines in <xref rid="figure4" ref-type="fig">Figure 4</xref>). This tendency was consistent among the different models, and the random forest model was the top performer.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Classification accuracies among different machine learning models with respect to ε. The performance of the models developed using original data is marked with dashed lines. SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="medinform_v9i11e26914_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, we developed and validated a local differential privacy method for the medical domain. We used the bounded Laplacian method to overcome the out-of-bounds problem. In addition, we used discretization postprocessing for the categorical variables to address nonexistent categorical variables following perturbation.</p>
        <p>Various approaches and metrics are employed when publishing microdata publicly. <italic>k</italic>-anonymity [<xref ref-type="bibr" rid="ref7">7</xref>] is a metric that requires each cluster (or set of persons in medical data) to have at least <italic>k</italic> records so that there are at least <italic>k</italic> – 1 individuals that are indistinguishable. However, this metric is susceptible to reidentification through linkage attacks and applications of background knowledge. <italic>l</italic>-diversity was introduced to overcome these limitations; it requires each equivalent block containing sensitive information to have at least <italic>l</italic> appropriately represented values. This method is still vulnerable to skewness and similarity attacks [<xref ref-type="bibr" rid="ref9">9</xref>]. <italic>t</italic>-closeness [<xref ref-type="bibr" rid="ref9">9</xref>] mitigates this issue by requiring an equivalence class to have a distance of less than <italic>t</italic> (the earth mover distance) between the distribution of a sensitive attribute and that of the overall data. However, using the earth mover distance makes it difficult to identify the closeness between <italic>t</italic> and the gained knowledge. In addition, in this approach, the distribution of sensitive attributes in the equivalence class must be similar to that in the entire data set.</p>
        <p>In contrast to these privacy metrics and methods, <italic>ε</italic>-differential privacy retains the structure of the data while adding noise to prevent leakage of the original data (<xref rid="figure2" ref-type="fig">Figure 2</xref>). There are two main differential privacy schemas: global and local. Global differential privacy requires the database owner to trust a curator that performs data perturbation before sending the data to the requested user. Our implementation, local differential privacy, assumes the worst-case scenario by considering an untrusted curator. The leakage of a medical data set may have critical consequences because such a data set may contain sensitive information, such as disease data, medical history, and insurance status. Therefore, our method minimizes the risk of data leaks by not trusting anyone outside the network.</p>
        <p>Medical domain data are, by nature, multidimensional and multimodal. <italic>k</italic>-anonymity may suffer from severe utility loss if applied to high-dimensional data [<xref ref-type="bibr" rid="ref27">27</xref>]. <italic>ε</italic>-differential privacy also suffered from severe utility loss under a low <italic>ε</italic>, which was apparent from the low classification accuracy in predicting the mortality rate (<xref rid="figure4" ref-type="fig">Figure 4</xref>). Despite the fact that the given data set was multidimensional and multimodal, adjusting the value of <italic>ε</italic> affected all variables uniformly regardless of their data type.</p>
        <p>Differential privacy usually has stronger tradeoffs between data utility, which we mainly focused on, and privacy [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. There were high variances between variables with regard to the MSEs and misclassification rates when <italic>ε</italic> was low (<xref rid="figure3" ref-type="fig">Figure 3</xref>). As <italic>ε</italic> increased, all variables approached their actual values, enabling better utility at the cost of privacy; this is apparent from the accuracy of prediction shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>. When publishing synthetically perturbed data with <italic>ε</italic>-differential privacy, we may consider providing the <italic>ε</italic> value along with the data. This additional information may provide users with insights into the degree of data perturbation.</p>
        <p>According to the results, for our data set, we may heuristically choose an <italic>ε</italic> value between 10<sup>3</sup> and 10<sup>4</sup> and apply differential privacy methods to send the perturbed data upon the user’s request. The optimal value of <italic>ε</italic> varies among different data sets and utility requirements, and choosing this value is beyond the scope of this study.</p>
        <p>A limitation of this study is that we only applied our algorithms to synthetic data, and we validated the algorithms on only one data set. However, it is likely that other data sets can also be directly employed because we used a relatively small amount of prior data knowledge in our algorithm. In addition, we excluded rows that contained null values in the database. Because medical data are high-dimensional and sparse, future studies should be conducted to address null values. The distributions of data sets affect the normalization and the perturbation process. It is better to share distributions with each institute, such as the minimum and maximum values of each column. The model would be developed from perturbed data, which can be less accurate than a model based on original data. The optimal ε value, which determines the degree of perturbation, should be set to apply to the algorithm. In this study, a value of ε between 10<sup>3</sup> and 10<sup>4</sup> seemed heuristically appropriate; this depends on which data or model is used.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>We applied local differential privacy to medical domain data, which is diverse and high-dimensional. Applying bounded Laplacian noise with discretization postprocessing ensures that no out-of-bound data are present. Higher noise may offer enhanced privacy, but it simultaneously hinders utility. Thus, choosing an appropriate degree of noise for data perturbation entails a privacy-utility tradeoff, and one should choose such parameters depending on specific situations.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">APACHE</term>
          <def>
            <p>Acute Physiology and Chronic Health Evaluation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">FiO2</term>
          <def>
            <p>fraction of inspired oxygen</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">HIPAA</term>
          <def>
            <p>Health Insurance Portability and Accountability Act</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">IoT</term>
          <def>
            <p>Internet of Things</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MSE</term>
          <def>
            <p>mean squared error</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by a grant of the Korea Health Technology R&#38;D Project and the MD-Phd/Medical Scientist Training Program through the Korea Health Industry Development Institute, funded by the Ministry of Health &#38; Welfare, Republic of Korea (KHIDIHI19C1015010020, HI21C0974).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Obermeyer</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Emanuel</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>Predicting the future - big data, machine learning, and clinical medicine</article-title>
          <source>N Engl J Med</source>
          <year>2016</year>
          <month>09</month>
          <day>29</day>
          <volume>375</volume>
          <issue>13</issue>
          <fpage>1216</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27682033"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMp1606181</pub-id>
          <pub-id pub-id-type="medline">27682033</pub-id>
          <pub-id pub-id-type="pmcid">PMC5070532</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Federated machine learning</article-title>
          <source>ACM Trans Intell Syst Technol</source>
          <year>2019</year>
          <month>02</month>
          <day>28</day>
          <volume>10</volume>
          <issue>2</issue>
          <fpage>1</fpage>
          <lpage>19</lpage>
          <pub-id pub-id-type="doi">10.1145/3298981</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Voigt</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>von dem Bussche</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>The EU General Data Protection Regulation (GDPR): A Practical Guide</source>
          <year>2017</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer International Publishing</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <article-title>Fact Sheet: The Health Insurance Portability and Accountability Act (HIPAA)</article-title>
          <source>US Department of Labor</source>
          <year>2004</year>
          <month>12</month>
          <access-date>2021-09-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://purl.fdlp.gov/GPO/gpo10291">http://purl.fdlp.gov/GPO/gpo10291</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chevrier</surname>
              <given-names>Raphaël</given-names>
            </name>
            <name name-style="western">
              <surname>Foufi</surname>
              <given-names>Vasiliki</given-names>
            </name>
            <name name-style="western">
              <surname>Gaudet-Blavignac</surname>
              <given-names>Christophe</given-names>
            </name>
            <name name-style="western">
              <surname>Robert</surname>
              <given-names>Arnaud</given-names>
            </name>
            <name name-style="western">
              <surname>Lovis</surname>
              <given-names>Christian</given-names>
            </name>
          </person-group>
          <article-title>Use and understanding of anonymization and de-identification in the biomedical literature: scoping review</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>05</month>
          <day>31</day>
          <volume>21</volume>
          <issue>5</issue>
          <fpage>e13484</fpage>
          <lpage>570</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/5/e13484/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/13484</pub-id>
          <pub-id pub-id-type="medline">31152528</pub-id>
          <pub-id pub-id-type="pii">v21i5e13484</pub-id>
          <pub-id pub-id-type="pmcid">PMC6658290</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
          <article-title>Anonymization</article-title>
          <source>International Association of Privacy Professionals</source>
          <access-date>2001-09-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://iapp.org/resources/article/anonymization/">https://iapp.org/resources/article/anonymization/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sweeney</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>k-Anonymity: a model for protecting privacy</article-title>
          <source>Int J Unc Fuzz Knowl Based Syst</source>
          <year>2012</year>
          <month>05</month>
          <day>02</day>
          <volume>10</volume>
          <issue>05</issue>
          <fpage>557</fpage>
          <lpage>570</lpage>
          <pub-id pub-id-type="doi">10.1142/S0218488502001648</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Machanavajjhala</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kifer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gehrke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Venkitasubramaniam</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>L-diversity: privacy beyond k-anonymity</article-title>
          <source>ACM Trans Knowl Discov Data</source>
          <year>2007</year>
          <month>03</month>
          <day>01</day>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>3</fpage>
          <lpage>es</lpage>
          <pub-id pub-id-type="doi">10.1145/1217299.1217302</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Venkatasubramanian</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>t-Closeness: privacy beyond k-anonymity and l-diversity</article-title>
          <year>2007</year>
          <month>6</month>
          <day>4</day>
          <conf-name>2007 IEEE 23rd International Conference on Data Engineering</conf-name>
          <conf-date>April 15-20, 2007</conf-date>
          <conf-loc>Istanbul, Turkey</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icde.2007.367856</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Prasser</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Eicher</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Spengler</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bild</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kuhn</surname>
              <given-names>KA</given-names>
            </name>
          </person-group>
          <article-title>Flexible data anonymization using ARX—current status and challenges ahead</article-title>
          <source>Softw: Pract Exper</source>
          <year>2020</year>
          <month>02</month>
          <day>25</day>
          <volume>50</volume>
          <issue>7</issue>
          <fpage>1277</fpage>
          <lpage>1304</lpage>
          <pub-id pub-id-type="doi">10.1002/spe.2812</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dwork</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kenthapadi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>McSherry</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Mironov</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Naor</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Our data, ourselves: privacy via distributed noise generation</article-title>
          <source>Advances in Cryptology - EUROCRYPT 2006</source>
          <year>2006</year>
          <conf-name>EUROCRYPT 2006: Annual International Conference on the Theory and Applications of Cryptographic Techniques</conf-name>
          <conf-date>May 28-June 1, 2006</conf-date>
          <conf-loc>Saint Petersburg, Russia</conf-loc>
          <fpage>486</fpage>
          <pub-id pub-id-type="doi">10.1007/11761679_29</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barthe</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chadha</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jagannath</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Sistla</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Viswanathan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Deciding differential privacy for programs with finite inputs and outputs</article-title>
          <year>2020</year>
          <month>7</month>
          <day>08</day>
          <conf-name>35th Annual ACM/IEEE Symposium on Logic in Computer Science</conf-name>
          <conf-date>July 8-11, 2020</conf-date>
          <conf-loc>Saarbrücken, Germany</conf-loc>
          <fpage>141</fpage>
          <lpage>154</lpage>
          <pub-id pub-id-type="doi">10.1145/3373718.3394796</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Differential privacy: from theory to practice</article-title>
          <source>Synthesis Lectures on Information Security, Privacy, and Trust</source>
          <year>2016</year>
          <month>10</month>
          <day>25</day>
          <access-date>2021-09-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.morganclaypool.com/doi/10.2200/S00735ED1V01Y201609SPT018">https://www.morganclaypool.com/doi/10.2200/S00735ED1V01Y201609SPT018</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
          <article-title>Differential privacy</article-title>
          <source>Apple</source>
          <access-date>2021-09-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.apple.com/privacy/docs/Differential_Privacy_Overview.pdf">https://www.apple.com/privacy/docs/Differential_Privacy_Overview.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <article-title>Google's differential privacy libraries</article-title>
          <source>GitHub</source>
          <access-date>2021-09-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/google/differential-privacy">https://github.com/google/differential-privacy</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <collab>Erlingsson</collab>
            <name name-style="western">
              <surname>Pihur</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Korolova</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>RAPPOR: Randomized Aggregatable Privacy-Preserving Ordinal Response</article-title>
          <source>CCS '14: Proceedings of the 2014 ACM SIGSAC Conference on Computer and Communications Security</source>
          <year>2014</year>
          <month>11</month>
          <day>03</day>
          <conf-name>2014 ACM SIGSAC Conference on Computer and Communications Security</conf-name>
          <conf-date>November 3-7, 2014</conf-date>
          <conf-loc>Scottsdale, AZ</conf-loc>
          <fpage>1054</fpage>
          <lpage>1067</lpage>
          <pub-id pub-id-type="doi">10.1145/2660267.2660348</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Niyato</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Local differential privacy-based federated learning for Internet of Things</article-title>
          <source>IEEE Internet Things J</source>
          <year>2021</year>
          <month>6</month>
          <day>1</day>
          <volume>8</volume>
          <issue>11</issue>
          <fpage>8836</fpage>
          <lpage>8853</lpage>
          <pub-id pub-id-type="doi">10.1109/jiot.2020.3037194</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nasr</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shokri</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Houmansadr</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Comprehensive privacy analysis of deep learning: passive and active white-box inference attacks against centralized and federated learning</article-title>
          <year>2019</year>
          <conf-name>2019 IEEE Symposium on Security and Privacy (SP)</conf-name>
          <conf-date>May 19-23, 2019</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <fpage>739</fpage>
          <pub-id pub-id-type="doi">10.1109/sp.2019.00065</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ha</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Truong</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Differential privacy in deep learning: an overview</article-title>
          <year>2019</year>
          <conf-name>2019 International Conference on Advanced Computing and Applications (ACOMP)</conf-name>
          <conf-date>November 26-28, 2019</conf-date>
          <conf-loc>Nha Trang, Vietnam</conf-loc>
          <pub-id pub-id-type="doi">10.1109/acomp.2019.00022</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Jang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Privacy-preserving aggregation of personal health data streams</article-title>
          <source>PLoS One</source>
          <year>2018</year>
          <volume>13</volume>
          <issue>11</issue>
          <fpage>e0207639</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tinyurl.com/yufc2xm2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0207639</pub-id>
          <pub-id pub-id-type="medline">30496200</pub-id>
          <pub-id pub-id-type="pii">PONE-D-18-26819</pub-id>
          <pub-id pub-id-type="pmcid">PMC6264901</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suriyakumar</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Papernot</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Goldenberg</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Chasing your long tails: differentially private prediction in health care settings</article-title>
          <source>FAccT '21: Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency</source>
          <year>2021</year>
          <month>03</month>
          <day>03</day>
          <conf-name>2021 ACM Conference on Fairness, Accountability, and Transparency</conf-name>
          <conf-date>March 3-10, 2021</conf-date>
          <conf-loc>Virtual event (Canada)</conf-loc>
          <fpage>723</fpage>
          <lpage>734</lpage>
          <pub-id pub-id-type="doi">10.1145/3442188.3445934</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dwork</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rothblum</surname>
              <given-names>GN</given-names>
            </name>
          </person-group>
          <article-title>Concentrated differential privacy</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online on March 6, 2016
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1603.01887"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holohan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Antonatos</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Braghin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mac Aonghusa</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The bounded Laplace mechanism in differential privacy</article-title>
          <source>ArXiv. Preprint posted online on August 30, 2018</source>
          <year>2020</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1808.10410"/>
          </comment>
          <pub-id pub-id-type="doi">10.29012/jpc.715</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AEW</given-names>
            </name>
            <name name-style="western">
              <surname>Raffa</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Badawi</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The eICU Collaborative Research Database, a freely available multi-center database for critical care research</article-title>
          <source>Sci Data</source>
          <year>2018</year>
          <month>09</month>
          <day>11</day>
          <volume>5</volume>
          <fpage>180178</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/sdata.2018.178"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2018.178</pub-id>
          <pub-id pub-id-type="medline">30204154</pub-id>
          <pub-id pub-id-type="pii">sdata2018178</pub-id>
          <pub-id pub-id-type="pmcid">PMC6132188</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zimmerman</surname>
              <given-names>Jack E</given-names>
            </name>
            <name name-style="western">
              <surname>Kramer</surname>
              <given-names>Andrew A</given-names>
            </name>
            <name name-style="western">
              <surname>McNair</surname>
              <given-names>Douglas S</given-names>
            </name>
            <name name-style="western">
              <surname>Malila</surname>
              <given-names>Fern M</given-names>
            </name>
          </person-group>
          <article-title>Acute Physiology and Chronic Health Evaluation (APACHE) IV: hospital mortality assessment for today's critically ill patients</article-title>
          <source>Crit Care Med</source>
          <year>2006</year>
          <month>05</month>
          <volume>34</volume>
          <issue>5</issue>
          <fpage>1297</fpage>
          <lpage>310</lpage>
          <pub-id pub-id-type="doi">10.1097/01.CCM.0000215112.84523.F0</pub-id>
          <pub-id pub-id-type="medline">16540951</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquax</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>J Mach Learn Res</source>
          <year>2011</year>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>2830</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rajendran</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jayabalan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rana</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A study on k-anonymity, l-diversity, and t-closeness techniques of privacy preservation data publishing</article-title>
          <source>Int J Innov Res Sci Eng Technol</source>
          <year>2019</year>
          <volume>6</volume>
          <issue>6</issue>
          <fpage>19</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.ijirst.org/articles/IJIRSTV6I6015.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kohlmayer</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Prasser</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Kuhn</surname>
              <given-names>KA</given-names>
            </name>
          </person-group>
          <article-title>The cost of quality: implementing generalization and suppression for anonymizing biomedical data with minimal information loss</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>12</month>
          <volume>58</volume>
          <fpage>37</fpage>
          <lpage>48</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00200-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.09.007</pub-id>
          <pub-id pub-id-type="medline">26385376</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00200-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dankar</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>El</surname>
              <given-names>EK</given-names>
            </name>
          </person-group>
          <article-title>Practicing differential privacy in health care: a review</article-title>
          <source>Trans Data Priv</source>
          <year>2013</year>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>35</fpage>
          <lpage>67</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.tdp.cat/issues11/tdp.a129a13.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
