<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v6i2e20</article-id>
    <article-id pub-id-type="pmid">29653917</article-id>
    <article-id pub-id-type="doi">10.2196/medinform.7744</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>Privacy-Preserving Patient Similarity Learning in a Federated Environment: Development and Analysis</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Eysenbach</surname>
          <given-names>Gunther</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Luo</surname>
          <given-names>Yuan</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Noman</surname>
          <given-names>Mohammed</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Marsolo</surname>
          <given-names>Keith</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1" corresp="yes">
      <name name-style="western">
        <surname>Lee</surname>
        <given-names>Junghye</given-names>
      </name>
      <degrees>PhD</degrees>
      <xref rid="aff1" ref-type="aff">1</xref>
      <address>
        <institution>School of Management Engineering</institution>
        <institution>Ulsan National Institute of Science and Technology</institution>
        <addr-line>UNIST Industrial complex campus</addr-line>
        <addr-line>10, Techno Saneop-ro 55beon-gil, Nam-gu</addr-line>
        <addr-line>Ulsan, 44776</addr-line>
        <country>Republic Of Korea</country>
        <phone>82 52 217 3129</phone>
        <email>jul289@ucsd.edu</email>
      </address>  
      <xref rid="aff2" ref-type="aff">2</xref>
      <xref rid="aff3" ref-type="aff">3</xref>
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-9736-4796</ext-link></contrib>
      <contrib contrib-type="author" id="contrib2">
        <name name-style="western">
          <surname>Sun</surname>
          <given-names>Jimeng</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-1512-6426</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>Wang</surname>
          <given-names>Fei</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff5" ref-type="aff">5</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-7594-663X</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib4">
        <name name-style="western">
          <surname>Wang</surname>
          <given-names>Shuang</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff2" ref-type="aff">2</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-6420-983X</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib5">
        <name name-style="western">
          <surname>Jun</surname>
          <given-names>Chi-Hyuck</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-0911-7347</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib6">
        <name name-style="western">
          <surname>Jiang</surname>
          <given-names>Xiaoqian</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff2" ref-type="aff">2</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-9933-2205</ext-link>
      </contrib>
    </contrib-group>
    <aff id="aff1">
    <sup>1</sup>
    <institution>School of Management Engineering</institution>
    <institution>Ulsan National Institute of Science and Technology</institution>  
    <addr-line>Ulsan</addr-line>
    <country>Republic Of Korea</country></aff>
    <aff id="aff2">
    <sup>2</sup>
    <institution>Department of Biomedical Informatics</institution>
    <institution>University of California San Diego</institution>  
    <addr-line>San Diego, CA</addr-line>
    <country>United States</country></aff>
    <aff id="aff3">
    <sup>3</sup>
    <institution>Department of Industrial and Management Engineering</institution>
    <institution>Pohang University of Science and Technology</institution>  
    <addr-line>Pohang</addr-line>
    <country>Republic Of Korea</country></aff>
    <aff id="aff4">
    <sup>4</sup>
    <institution>College of Computing</institution>
    <institution>Georgia Institute of Technology</institution>  
    <addr-line>Atlanta, GA</addr-line>
    <country>United States</country></aff>
    <aff id="aff5">
    <sup>5</sup>
    <institution>Division of Health Informatics, Department of Healthcare Policy and Research</institution>
    <institution>Weill Cornell Medical College</institution>  
    <institution>Cornell University</institution>  
    <addr-line>New York City, NY</addr-line>
    <country>United States</country></aff>
    <author-notes>
      <corresp>Corresponding Author: Junghye Lee 
      <email>jul289@ucsd.edu</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><season>Apr-Jun</season><year>2018</year></pub-date>
    <pub-date pub-type="epub">
      <day>13</day>
      <month>04</month>
      <year>2018</year>
    </pub-date>
    <volume>6</volume>
    <issue>2</issue>
    <elocation-id>e20</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>29</day>
        <month>3</month>
        <year>2017</year>
      </date>
      <date date-type="rev-request">
        <day>22</day>
        <month>7</month>
        <year>2017</year>
      </date>
      <date date-type="rev-recd">
        <day>12</day>
        <month>9</month>
        <year>2017</year>
      </date>
      <date date-type="accepted">
        <day>6</day>
        <month>1</month>
        <year>2018</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Junghye Lee, Jimeng Sun, Fei Wang, Shuang Wang, Chi-Hyuck Jun, Xiaoqian Jiang. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 13.04.2018.</copyright-statement>
    <copyright-year>2018</copyright-year>
    <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://medinform.jmir.org/2018/2/e20/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="background">
        <title>Background</title>
        <p>There is an urgent need for the development of global analytic frameworks that can perform analyses in a privacy-preserving federated environment across multiple institutions without privacy leakage. A few studies on the topic of federated medical analysis have been conducted recently with the focus on several algorithms. However, none of them have solved similar patient matching, which is useful for applications such as cohort construction for cross-institution observational studies, disease surveillance, and clinical trials recruitment.</p>
      </sec>
      <sec sec-type="objective">
        <title>Objective</title>
        <p>The aim of this study was to present a privacy-preserving platform in a federated setting for patient similarity learning across institutions. Without sharing patient-level information, our model can find similar patients from one hospital to another.</p>
      </sec>
      <sec sec-type="methods">
        <title>Methods</title>
        <p>We proposed a federated patient hashing framework and developed a novel algorithm to learn context-specific hash codes to represent patients across institutions. The similarities between patients can be efficiently computed using the resulting hash codes of corresponding patients. To avoid security attack from reverse engineering on the model, we applied homomorphic encryption to patient similarity search in a federated setting.</p>
      </sec>
      <sec sec-type="results">
        <title>Results</title>
        <p>We used sequential medical events extracted from the Multiparameter Intelligent Monitoring in Intensive Care-III database to evaluate the proposed algorithm in predicting the incidence of five diseases independently. Our algorithm achieved averaged area under the curves of 0.9154 and 0.8012 with balanced and imbalanced data, respectively, in <italic>κ</italic>-nearest neighbor with <italic>κ</italic>=3. We also confirmed privacy preservation in similarity search by using homomorphic encryption.</p>
      </sec>
      <sec sec-type="conclusions">
        <title>Conclusions</title>
        <p>The proposed algorithm can help search similar patients across institutions effectively to support federated data analysis in a privacy-preserving manner.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>privacy</kwd>
      <kwd>federated environment</kwd>
      <kwd>similarity learning</kwd>
      <kwd>hashing</kwd>
      <kwd>homomorphic encryption</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Data-Driven Decision Making in Medical Fields</title>
        <p>Electronic health records (EHRs) are becoming ubiquitous across almost all medical institutions. They provide insight into diagnoses [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref6">6</xref>], as well as prognoses [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref10">10</xref>] and can assist in the development of cost-effective treatment and management programs [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. All kinds of data across institutions are being collected in EHRs, including diagnosis, medication, lab results, procedures, and clinical notes. In the recently announced precision medicine initiative, many more other types of data including omics data such as genomic and proteomic data and behavior data such as activity sensor data are being generated and collected by doctors and patients. As such rich and heterogeneous health data become available, the entire medical research and practice are shifting from the knowledge or guideline-driven approaches to the data or evidence-driven paradigm, where effective and efficient algorithms become the key for clinical research and practice.</p>
      </sec>
      <sec>
        <title>Limitations of Single-Institutional Studies</title>
        <p>Previously, many biomedical studies were conducted within a single institution having limited EHR data because of the lack of federated data analysis framework and the institutional privacy concerns on data sharing. However, such an approach has many limitations. For example, it has been demonstrated that genome-wide association studies on EHR data often failed to discover known biomarkers from a single institution because of limited sample size [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. To enable cross-institutional studies, many collaborative networks have been proposed, such as mini-sentinel [<xref ref-type="bibr" rid="ref17">17</xref>], Observational Health Data Sciences and Informatics [<xref ref-type="bibr" rid="ref18">18</xref>], National Patient-Centered Clinical Research Network [<xref ref-type="bibr" rid="ref19">19</xref>], and i2b2 Shared Health Research Informatics Network [<xref ref-type="bibr" rid="ref20">20</xref>]. These frameworks enable certain analyses (such as database queries with very specific inclusion or exclusion criteria) to be conducted efficiently in a federated manner. However, more sophisticated analyses such as predictive models [<xref ref-type="bibr" rid="ref21">21</xref>] and context-specific patient similarity search [<xref ref-type="bibr" rid="ref22">22</xref>] are still a challenge for most existing frameworks, as cross-institutional EHR data exchange is required to build such models, which is usually infeasible because of the institutional privacy and security concerns. There is an urgent need for the development of novel frameworks that can perform analyses in a privacy-preserving federated environment across multiple institutions. In this way, global analytic models can be built collectively without sharing raw EHR data. A few studies on the topic of federated clinical analysis [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref26">26</xref>] have been conducted recently with the focus on different algorithms. However, none of them have solved the problem of similar patient matching, which is important for many biomedical studies. Therefore, we plan to develop a privacy-preserving analytic platform that focuses on a suite of algorithmic challenges on patient similarity learning.</p>
      </sec>
      <sec>
        <title>Patient Similarity Learning</title>
        <p>Patient similarity learning aims to develop computational algorithms for defining and locating clinically similar patients to a query patient under a specific clinical context [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref30">30</xref>]. The patient similarity search is very challenging because the raw EHR data is sparse, high-dimensional, and noisy, which makes finding an exact match among patients using EHR data almost impossible. Besides, patient similarity learning is often context-specific. For example, patient similarity measure for heart disease management can be very different from cancer management. The fundamental challenge is how we can perform effective context-specific patient similarity learning in a federated setting, which enables many different applications:</p>
        <list list-type="bullet">
          <list-item>
            <p>Cohort construction: cross-institution observational studies are challenging but necessary as many studies require a large and specific patient cohort that does not exist within a single institution. To conduct such a study, an efficient similarity search needs to be conducted across institutions to identify the focused patient cohort.</p>
          </list-item>
          <list-item>
            <p>Disease surveillance: The Centers for Disease Control and Prevention monitors thousands of hospitals for potential epidemics. When a suspicious case is reported, there is a need to find similar cases across geographies.</p>
          </list-item>
          <list-item>
            <p>Clinical trial recruitment: pharmaceutical companies often need to spend significant amount of time and resources to identify targeted patients through many different clinical institutions. Ideally, they would like to be able to perform patient similarity search across all clinical institutions to identify where those relevant patients are. Then they can quickly focus on recruiting patients from the right clinical institutions.</p>
          </list-item>
        </list>
        <p>Patient similarity learning involves two computational phases: (1) patient representation learning is to learn the context-specific representation of patients based on their EHR data. For example, patients may be given different representations in heart disease management versus cancer management and (2) patient similarity search is to find similar patients based on their corresponding representations. In a federated environment where multiple institutions exist, patient similarity learning has many unique challenges: (1) how to design an efficient but flexible patient representation that enables fast similarity search? (2) how to learn patient representation from heterogeneous data sources? and (3) how to preserve privacy while still allowing the computation of the patient representation and the search of similar patients across institutions?</p>
      </sec>
      <sec>
        <title>Research Objective</title>
        <p>The main objective of this paper was to develop a privacy-preserving analytic platform for patient similarity learning in a distributed manner. We propose to learn context-specific binary hash codes to represent patients across institutions. The similarities between patients can be efficiently computed as the hamming distance using the resulting hash codes of corresponding patients; the hamming distance is defined to be the number of places where two binary codes differ. As patient data are heterogeneous from multiple sources such as diagnosis, medication, and lab results, we propose a multi-hash approach that learns a hash function for each data source. Then, the patient similarity is calculated by hash codes from data sources. To avoid the potential security risk because of the attack from malicious users, we also adopt homomorphic encryption [<xref ref-type="bibr" rid="ref31">31</xref>] to support secure patient similarity search in a federated setting. Finally, the proposed algorithm is applied and validated on real data.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Feature Construction</title>
        <p>For <italic>K</italic> feature domains, we assume a vector-based representation for patients in every feature domain (1≤<italic>k</italic>≤<italic>K</italic>). There are different ways to construct the feature vectors: (1) for nominal features with standard dictionaries, such as diagnosis and procedure codes, we can use either binary value for presence, or code frequency within the observation period (where the features are extracted from); (2) for continuous features such as age or lab test values, we can use them as they are or we can first quantize them and treat each quantized region as a nominal feature. For example, the values of a specific lab test can be quantized as critical low, low, normal, high, and critical high; and (3) for time-evolving features, if we want to consider the temporal trends in the feature construction process, we can first construct a temporal pattern dictionary with either data-driven method or expertise knowledge, and then treat each pattern as a nominal feature. For example, if there are four types of features including two demographics, 20 prescriptions, 15 lab tests, and 10 diagnoses, we can construct a vector-based representation for patient A as shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>. We represent gender as a binary value and age as it is. For diagnosis, prescription, and lab test, we add a one-hot representation of each event (ie, {0,1}<sup>&#124;C</sup><sup>&#124;</sup> with the number of codes &#124; <italic>C</italic> &#124;).</p>
      </sec>
      <sec>
      <title>Hashing</title>
      <p>In general, hashing is an approach of transforming the data item to a low-dimensional representation, or equivalently a short code consisting of a sequence of bits (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p>
      <p>Hashing technologies can be applied in many applications such as Bloom filter [<xref ref-type="bibr" rid="ref32">32</xref>] and cryptography [<xref ref-type="bibr" rid="ref33">33</xref>]. Similarity-based hashing [<xref ref-type="bibr" rid="ref34">34</xref>] is one specific type of hashing that aims to preserve the data similarities in their original space with hash codes. On the basis of the availability of supervision information, a similarity-based hashing method can be categorized as unsupervised [<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref40">40</xref>], semi-supervised [<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref43">43</xref>], or supervised hashing [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. Unsupervised methods learn hash functions purely based on data distributions. Supervised methods exploit the labeled pairwise relationship between entities to capture the high-level data semantics. Semi-supervised methods lie in between them, that is, they explore both data distribution characteristics and labeled pairwise data relationships to learn the hash functions. Most of these existing methods assume a single vector-based representation for every data object.</p>  
      <p>However, one challenge in our scenario is that the patient features are highly heterogeneous, that is, the features for characterizing the patients are of different types. In this case, it may not be effective to represent each patient as a single vector (simple concatenation will not work as different features are of different types and have different value range). There are some existing multi-modal hashing methods [<xref ref-type="bibr" rid="ref46">46</xref>-<xref ref-type="bibr" rid="ref52">52</xref>] that aim to derive a unified single-hash table for encoding the data objects with heterogeneous features. The problem with single-hash (or uni-hash) table is that it is difficult to discover the latent similarity components [<xref ref-type="bibr" rid="ref53">53</xref>] derived from different feature types, which is crucial in our scenario. For example, it is important to know how similar two patients are, but also why (eg, patients A and B are similar to each other mainly because of their similar demographics and patients B and C are similar because of their similar diagnosis history and lab test values).</p>
      
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Example of feature construction. Prescription, lab test, and diagnosis are denoted by p, l, and d, respectively.</p>
        </caption>
        <graphic xlink:href="medinform_v6i2e20_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Example of hashing.</p>
          </caption>
          <graphic xlink:href="medinform_v6i2e20_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        
      </sec>
      <sec>
        <title>Federated Patient Hashing Framework</title>
        <p>Symbols used in this paper are listed in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>.</p>
                <p><xref ref-type="fig" rid="figure3">Figure 3</xref> illustrates the overall federated patient matching framework. Suppose there are <italic>M</italic> sites with the <italic>i</italic>-th site <italic>S</italic><sup>i</sup> which owns a patient population <italic>P</italic><sup>i</sup>. We use <italic>p</italic><sup>i</sup><sub>j</sub> to represent the <italic>j</italic>-th patient in <italic>P</italic><sup>i</sup>. Then, our problem is, given a query patient, how to retrieve similar patients from those <italic>M</italic> sites without explicitly accessing the patient feature vectors. Our plan is to resolve this problem using similarity based hashing, which transforms the patient’s raw features into a binary vector representing patient characteristics (patient representation learning). The pairwise patient similarities will be evaluated as the pairwise distance based on those signatures (patient similarity search). In this paper, we will focus on feature-based hashing, that is, those binary patient signatures are obtained by proper transformation from patient features. Therefore, to perform hashing, we need to first construct feature-based representation for patients.</p>
        <p>Without the loss of generality, we assume there are <italic>K</italic> different feature types to characterize every <italic>p</italic><sup>i</sup><sub>j</sub>, and we use <italic>p</italic><sup>i</sup><sub>jk</sub> (<italic>k</italic>=1,2,…, <italic>K</italic>) to represent the <italic>k</italic>-th type of feature vector of <italic>p</italic><sup>i</sup><sub>j</sub>. The goal is to derive an effective computational framework for patient matching in a federated environment, and the key idea is to learn a good hash function that can transform the patient features into binary hash codes. A uni-hash table approach shown in <xref ref-type="fig" rid="figure2">Figures 2</xref> and <xref ref-type="fig" rid="figure3">3</xref> is to learn only one hash function for the feature vector <italic>f</italic>: <italic>R</italic><sup>d</sup>→{-1,+1}<sup>b</sup>, where <italic>d</italic> is the dimensionality of the whole feature vector, and <italic>b</italic> is the number of bits of the hash codes learned <italic>d</italic> by <italic>f</italic>. In this paper, we propose a multi-hash approach for patient hashing that aims to learn a hash function <italic>f</italic><sub>k</sub>: <italic>R</italic><sup>d</sup><sub>k</sub>→{-1,+1}<sup>b</sup><sub>k</sub> for every patient feature type <italic>k</italic> (<italic>k</italic>=1,2,…, <italic>K</italic>); <italic>d</italic><sub>k</sub> is the dimensionality of the <italic>k</italic>-th feature type, and <italic>b</italic><sub>k</sub> is the number of bits of the learned hash codes for the <italic>k</italic>-th feature type. Each <italic>f</italic><sub>k</sub> (<italic>k</italic>=1,2,…, <italic>K</italic>) is shared across all the <italic>M</italic> sites. We use the sign function to construct the hash codes, that is, sign (<italic>Q</italic><sup>i</sup><sub>k</sub>)∈{-1,+1}<sup>b</sup><sub>k</sub><sup>ⅹN</sup><sub>i</sub> , where <italic>Q</italic><sup>i</sup><sub>k</sub> is transformed numerical data from original data of <italic>i</italic>-th site for <italic>k</italic>-th type of feature <italic>P</italic><sup>i</sup><sub>k</sub>∈ <italic>R</italic><sup>d</sup><sub>k</sub><sup>ⅹN</sup><sub>i</sub> by a hash function <italic>f</italic><sub>k</sub> that incorporates function coefficients for the <italic>k</italic>-th feature type <italic>W</italic><sub>k</sub>∈<sup>d</sup><sub>k</sub><sup>ⅹb</sup><sub>k</sub>; <italic>N</italic><sub>i</sub> is the population size of <italic>i</italic>-th site. How these components are formulated is described in the next paragraph in detail. We use <italic>H</italic><sub>k</sub><sup>i</sup>=sign(<italic>Q</italic><sup>i</sup><sub>k</sub>) to denote the hash codes of <italic>k</italic>-th feature type for the patients at <italic>i</italic>-th site. <xref ref-type="fig" rid="figure4">Figure 4</xref> shows the process of patient similarity calculation with a multi-hash approach.</p>
        <p>The <italic>u</italic>-th column of <italic>H</italic><sub>k</sub><sup>i</sup>, <italic>h</italic><sup>i</sup><sub>uk</sub>∈{-1,+1}<sup>b</sup><sub>k</sub> is the hash codes of <italic>p</italic><sup>i</sup><sub>uk</sub>. Then, the similarity between <italic>p</italic><sup>i</sup><sub>uk</sub> and <italic>p</italic><sup>i</sup><sub>uk</sub> can be evaluated as the inner product of <italic>h</italic><sup>i</sup><sub>uk</sub> and <italic>h</italic><sup>i</sup><sub>uk</sub> as shown in equation 1:</p>
        <disp-formula>
        <bold>(1)	</bold>
        <italic>s</italic>
        <sup>i</sup>
        <sub>kuv</sub> = 1/ 
        <italic>b</italic>
        <sub>k</sub>(<italic>h</italic><sup>i</sup>
        <sub>uk</sub>) 
        <sup>T</sup>(<italic>h</italic>
        <sup>i</sup>
        <sub>vk</sub>)</disp-formula>
        <p>Thus, the overall similarity can be computed as the average of <italic>K</italic> similarities, as shown in equation 2, which is bounded on the interval of (−1,1).</p>
        <disp-formula>
        <bold>(2)	</bold>
        <italic>s</italic>
        <sup>i</sup>
        <sub>uv</sub> = 1/ 
        <italic>K</italic>∑<sub>k</sub>(<italic>h</italic>
        <sup>i</sup>
        <sub>uk</sub>) 
        <sup>T</sup>(<italic>h</italic>
        <sup>i</sup>
        <sub>vk</sub>)</disp-formula>
        <p>Here, we suggest a general framework for learning {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>, which is the most important component. The framework basically constructs an objective function in terms of {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub> such as shown in equation 3, where <italic>λ</italic><sub>S</sub><italic>, λ</italic><sub>U</sub><italic>,</italic> and <italic>λ</italic><sub>W</sub> are regularizers of <italic>S</italic> ({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>), <italic>U</italic> ({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>), and Ω({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>), respectively, and then minimizes (or maximizes) it:</p>
        <disp-formula>
        <bold>(3)	</bold>
        <italic>J</italic> ({<italic>W</italic>
        <sub>k</sub>}<sup>K</sup>
        <sub>k=1</sub>) = 
        <italic>Ψ</italic> ({<italic>W</italic>
        <sub>k</sub>}<sup>K</sup>
        <sub>k=1</sub>) + 
        <italic>λ</italic>
        <sub>S</sub>
        <italic>S</italic> ({<italic>W</italic>
        <sub>k</sub>}<sup>K</sup>
        <sub>k = 1</sub>) + 
        <italic>λ</italic>
        <sub>U</sub>
        <italic>U</italic> ({<italic>W</italic>
        <sub>k</sub>}<sup>K</sup>
        <sub>k = 1</sub>) + 
        <italic>λ</italic>
        <sub>W</sub>Ω ({<italic>W</italic>
        <sub>k</sub>}<sup>K</sup>
        <sub>k = 1</sub>)</disp-formula>
        <p><italic>Ψ</italic> ({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>) is a reconfiguration error term between the low-dimensional representation of the original data and hash codes, which is the main term of the objective function, and generates the hash codes from the original data, as shown in equation 4, where &#124;&#124;·&#124;&#124;<sub>F</sub> is a Frobenius norm [<xref ref-type="bibr" rid="ref54">54</xref>]. On the basis of this term, the hash function in our framework is formed as <italic>f</italic><sub>k</sub>(<italic>P</italic><sub>k</sub><sup>i</sup>)<italic>=W</italic><sub>k</sub><sup>T</sup><italic>P</italic><sub>k</sub><sup>i</sup><italic>,</italic> and this transformation results in <italic>H</italic><sub>k</sub><sup>i</sup>=sign(<italic>Q</italic><sup>i</sup><sub>k</sub>).</p>
        <disp-formula>
        <bold>(4)	</bold>
        <italic>Ψ</italic> ({<italic>W</italic>
        <sub>k</sub>}<sup>K</sup>
        <sub>k=1</sub>) = ∑<sub>i</sub>∑<sub>k</sub>&#124;&#124;<italic>W</italic><sub>k</sub>
        <sup>T</sup>
        <italic>P</italic>
        <sub>k</sub>
        <sup>i</sup> - 
        <italic>H</italic>
        <sub>k</sub>
        <sup>i</sup>&#124;&#124;<sup>2</sup>
        <sub>F</sub></disp-formula>
                <p>The objective function can incorporate regularizers, as well as the main term to obtain better solutions of {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub> by (1) introducing additional information to improve either unsupervised or supervised learning if desired, (2) solving an ill-posed problem, and 3) preventing overfitting. Possible regularizers are listed as follows:</p>
        <p><italic>S</italic>({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>) is a supervised loss term that measures the quantization loss during the hashing process when supervision information is available for the patients. Here, the supervision information could be the labels of the patients, such as the disease the patients have. For example, if both <italic>p</italic><sup>i</sup><sub>u</sub> and <italic>p</italic><sup>i</sup><sub>v</sub> have the same disease, then their relationship <italic>r</italic><sup>i</sup><sub>uv</sub>=1, otherwise <italic>r</italic><sup>i</sup><sub>uv</sub>=-1. Then, we can set <italic>S</italic>({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>) as shown in equation 5:</p>
        <disp-formula>
        <bold>(5)	</bold>
        <italic>S</italic> ({<italic>W</italic>
        <sub>k</sub>}<sup>K</sup>
        <sub>k=1</sub>) = ∑<sub>i</sub>∑<sub>k</sub>∑<sub>u,v</sub> - 
        <italic>s</italic>
        <sup>i</sup>
        <sub>kuv</sub>
        <italic>r</italic>
        <sup>i</sup>
        <sub>uv</sub></disp-formula>
        
        <boxed-text id="box1" position="float">
          <title>List of symbols.</title>
          <p><italic>M</italic>: the number of local sites</p>
          <p><italic>K</italic>: the number of feature types (domains)</p>
          <p><italic>S</italic><sup>i</sup>: <italic>i</italic>-th local site</p>
          <p><italic>P</italic><sup>i</sup>: patient population in <italic>S</italic><sup>i</sup></p>
          <p><italic>N</italic><sup>i</sup>: patient population size of <italic>S</italic><sup>i</sup></p>
          <p><italic>P</italic><sup>i</sup><sub>k</sub>: patient population for <italic>k</italic>-th type of feature in <italic>S</italic><sup>i</sup></p>
          <p><italic>p</italic><sup>i</sup><sub>j</sub>: <italic>j</italic>-th column of <italic>P</italic><sup>i</sup>, <italic>j</italic>-th patient in <italic>P</italic><sup>i</sup></p>
          <p><italic>p</italic><sup>i</sup><sub>jk</sub>: <italic>j</italic>-th column of <italic>P</italic><sup>i</sup><sub>k</sub>, <italic>k</italic>-th type of feature vector for  <italic>p</italic><sup>i</sup><sub>j</sub></p>
          <p><italic>f</italic><sub>k</sub>: <italic>k</italic>-th hash function</p>
          <p><italic>d</italic><sub>k</sub>: dimensionality of the <italic>k</italic>-th feature type</p>
          <p><italic>b</italic><sub>k</sub>: the number of bits of the learned hash codes for the <italic>k</italic>-th feature type</p>
          <p><italic>W</italic><sub>k</sub>: function coefficients of the hash function for the <italic>k</italic>-th feature type</p>
          <p><italic>w</italic><sub>ik</sub>: <italic>i</italic>-th column of <italic>W</italic><sub>k</sub></p>
          <p><italic>Q</italic><sup>i</sup><sub>k</sub>: numerical data transformed from <italic>P</italic><sup>i</sup><sub>k</sub></p>
          <p>sign(<italic>Q</italic><sup>i</sup><sub>k</sub>): signed <italic>Q</italic><sup>i</sup><sub>k</sub></p>
          <p><italic>H</italic><sup>i</sup><sub>k</sub>: hash codes for <italic>P</italic><sup>i</sup><sub>k</sub>(=sign(<italic>Q</italic><sup>i</sup><sub>k</sub>))</p>
          <p><italic>h</italic><sup>i</sup><sub>jk</sub>: <italic>j</italic>-th column of <italic>H</italic><sup>i</sup><sub>k</sub>, the hash codes of <italic>p</italic><sup>i</sup><sub>jk</sub></p>
          <p>Ψ({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>): reconfiguration error term for {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub></p>
          <p><italic>S</italic> ({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>): supervised loss term for {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub></p>
          <p><italic>U</italic> ({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>): unsupervised loss term for {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub></p>
          <p><italic>Ω</italic> ({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>): term related to {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub> itself</p>
          <p><italic>L</italic> (<italic>x</italic>, <italic>y</italic>): loss function between <italic>x</italic> and <italic>y</italic></p>
          <p><italic>λ</italic><sub>S</sub>: regularizer of <italic>S</italic> ({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>)</p>
          <p><italic>λ</italic><sub>U</sub>: regularizer of <italic>U</italic> ({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>)</p>
          <p><italic>λ</italic><sub>W</sub>: regularizer of <italic>Ω</italic> ({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>)</p>
          <p><italic>λ</italic>: regularizer of a supervised loss term</p>
          <p><italic>η</italic>: regularizer of a Frobenius norm for <italic>Q</italic></p>
          <p><italic>σ</italic><sup>i</sup><sub>kuv</sub>: similarity between <italic>p</italic><sup>i</sup><sub>uk</sub> and <italic>p</italic><sup>i</sup><sub>vk</sub></p>
          <p><italic>R</italic><sup>i</sup>: pairwise relationship of <italic>R</italic><sup>i</sup> for labeled information</p>
          <p><italic>S</italic><sup>i</sup><sub>k</sub>: pairwise similarity of <italic>P</italic><sup>i</sup><sub>k</sub></p>
          <p><italic>r</italic><sup>i</sup><sub>uv</sub>: relationship between <italic>p</italic><sup>i</sup><sub>uk</sub> and <italic>p</italic><sup>i</sup><sub>vk</sub> for labeled information</p>
          <p><italic>s</italic><sup>i</sup><sub>kuv</sub>: similarity between <italic>p</italic><sup>i</sup><sub>uk</sub> and <italic>p</italic><sup>i</sup><sub>vk</sub></p>
          <p><italic>S</italic><sub>L</sub><italic>(Q</italic><sup>i</sup><sub>k</sub><italic>)</italic>: approximated sign function for <italic>Q</italic><sup>i</sup><sub>k</sub></p>
              </boxed-text>

        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>The whole process of patient matching in a federated environment. The user sends a patient matching request to the service center, which is delegated to patient data resources from several clinical sites. Due to the privacy concerns, the center does not have access to the raw patient data. All patients within different sites need to be first hashed, and the center only has the patient’s signatures after hashing. The hash functions are shared across different sites.</p>
          </caption>
          <graphic xlink:href="medinform_v6i2e20_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>

        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>The process of calculating patient similarity with a multi-hash approach.</p>
          </caption>
          <graphic xlink:href="medinform_v6i2e20_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>

        <p>The possible choices of supervised loss term could be any loss function <italic>L</italic> (<italic>x,y</italic>), and examples include <italic>L(x,y)=-xy</italic> and well-known binary loss functions such as (1) logistic loss, <italic>L</italic> (<italic>x,y</italic>)=log(1+exp(- <italic>xy</italic>)) and (2) hinge loss, <italic>L</italic> (<italic>x,y</italic>)=max(0,1-<italic>xy</italic>).</p>
        <p>Note that <italic>U</italic>({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>) is an unsupervised term that exploits the intrinsic data distribution and enforces the resultant hash codes to comply with the distribution. For example, we can request similar patients to have similar hash codes on each feature type. This can be achieved by minimizing the below regularizer, as shown in equation 6, where <italic>σ</italic><sup>i</sup><sub>kuv</sub> is a similarity between <italic>p</italic><sup>i</sup><sub>uk</sub> and <italic>p</italic><sup>i</sup><sub>vk</sub> based on, for example, a Gaussian function for continuous valued features or a cosine function after Term Frequency-Inverse Document Frequency normalization on bag-of-code (eg, diagnosis code or procedure code):</p>
        <disp-formula>
        <bold>(6)	</bold>
        <italic>U</italic> ({<italic>W</italic>
        <sub>k</sub>}<sup>K</sup>
        <sub>k=1</sub>) = ∑<sub>i</sub>∑<sub>k</sub>∑<sub>u,v</sub>
        <italic>σ</italic>
        <sup>i</sup>
        <sub>kuv</sub>&#124;&#124;<italic>h</italic>
        <sup>i</sup>
        <sub>uk</sub>- 
        <italic>h</italic>
        <sup>i</sup>
        <sub>uk</sub>&#124;&#124;<sup>2</sup>
        <sub>F</sub></disp-formula>
        <p>Ω({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>) is a term related to {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub> themselves, which is independent of the patient features. Examples of Ω({<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>) include (1) Frobenius norm regularizer ∑<sup>K</sup><sub>k=1</sub>&#124;&#124;<italic>W</italic><sub>k</sub>&#124;&#124;<sup>2</sup><sub>F</sub>, which can be used for improving the numerical stability of the solution process and (2) orthogonality regularizer ∑<sup>K</sup><sub>k=1</sub>∑<sub>i≠j</sub>&#124;&#124;<italic>w</italic><sup>T</sup><sub>ik</sub><italic>w</italic><sub>jk</sub>&#124;&#124;<sup>2</sup>, where <italic>w</italic><sub>ik</sub> is the <italic>i</italic>-th column of <italic>W</italic><sub>k</sub>, which can encourage the diversity of the learned hash codes and thus improve their representation effectiveness.</p>
        <p><xref ref-type="fig" rid="figure5">Figure 5</xref> shows a running example of the proposed hashing methodology. Such optimization problems can be solved with Block Coordinate Descent technologies [<xref ref-type="bibr" rid="ref55">55</xref>], with {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub> as variable blocks that alternatively update <italic>W</italic><sub>k</sub> (1≤<italic>k</italic>≤<italic>K</italic>) one by one. Moreover, as different sites are continuously receiving new patients (or new patient features), we will need to continuously update the hash functions as well. Fortunately, as can be observed from equations 4, 5, and 6, those terms are fully decomposable with respect to different sites. Therefore, we can update the hash functions in an asynchronous manner, that is, we can update the current {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub> as soon as new patient data is received on site <italic>i</italic>.</p>
      </sec>
      <sec>
      <title>Privacy-Preserving Patient Representation Learning in a Federated Setting</title>
      <p>Without loss of generality, let us instantiate the objective function with the regularizer <italic>λ</italic> of empirical error on the labeled data for a family of hash codes; this choice might be the most basic approach to similar patient learning based on the fact that supervised learning is more commonly used than unsupervised learning because data generated in the medical field usually have label information.</p>  
      <fig id="figure5" position="float">
        <label>Figure 5</label>
        <caption>
          <p>Example of transformation of patient vectors into hash codes and computation of similarity between hash codes.</p>
        </caption>
        <graphic xlink:href="medinform_v6i2e20_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>  
      <p>When solving the initiated objective function, two possible problems because of the sign function for <italic>Q</italic> arise. First, <italic>Q</italic> may not be a unique solution, and thus, the objective function is difficult to converge without considering any regularizer about <italic>Q</italic>. We add a Frobenius norm regularizer <italic>η</italic> to solve this problem. In addition, the objective function <italic>f(W,Q)</italic> is nondifferentiable in terms of <italic>Q</italic>. We can approximate the sign function with the surrogate function. Then, we have the final objective function, as shown in equation 7, where <italic>R</italic><sup>i</sup>∈ <italic>R</italic><sup>N</sup><sub>i</sub><sup>ⅹN</sup><sub>i</sub> is the pairwise relationship of <italic>P</italic><sup>i</sup> for labeled information:</p>
      <disp-formula>
      <bold>(7)	</bold>
      <italic>f</italic> (<italic>W</italic>, 
      <italic>Q</italic>) = min∑<sub>i</sub>∑<sub>k</sub>&#124;&#124;<italic>W</italic>
      <sub>k</sub>
      <sup>T</sup>
      <italic>P</italic>
      <sup>i</sup>
      <sub>k</sub>
      <italic>-S</italic>
      <sub>L</sub>( 
      <italic>Q</italic>
      <sup>i</sup>
      <sub>k</sub>)&#124;&#124;<sup>2</sup>
      <sub>F</sub> + 
      <italic>λ</italic> ∑<sub>i</sub>∑<sub>k</sub> tr(<italic>-S</italic>
      <sub>L</sub>( 
      <italic>Q</italic>
      <sup>i</sup>
      <sub>k</sub>) 
      <italic>R</italic>
      <sup>i</sup>
      <italic>S</italic>
      <sub>L</sub>( 
      <italic>Q</italic>
      <sup>i</sup>
      <sub>k</sub>)<sup>T</sup>
      <italic>) + η</italic> ∑ 
      <sub>i</sub>∑ 
      <sub>k</sub>&#124;&#124; 
      <italic>Q</italic>
      <sup>i</sup>
      <sub>k</sub>&#124;&#124;<sup>2</sup>
      <sub>F</sub></disp-formula>
      <p>If both <italic>p</italic><sup>i</sup><sub>u</sub> and <italic>p</italic><sup>i</sup><sub>v</sub> have the same disease, then their relationship <italic>r</italic><sup>i</sup><sub>uv</sub>=1, otherwise <italic>r</italic><sup>i</sup><sub>uv</sub>=-1, and <italic>S</italic><sub>L</sub>(·) is the surrogate function, as shown in equation 8, where ∘ is the hadamard (elementwise) product:</p>
      <disp-formula>
      <bold>(8)	</bold>
      <italic>S</italic>
      <sub>L</sub> (<italic>Q</italic>
      <sup>i</sup>
      <sub>k</sub>) = (<italic>Q</italic>
      <sup>i</sup>
      <sub>k</sub>∘ 
      <italic>Q</italic>
      <sup>i</sup>
      <sub>k</sub>
      <italic> + ξ</italic>) 
      <sup>-1/2</sup>∘<italic>Q</italic>
      <sup>i</sup>
      <sub>k</sub></disp-formula>
      <p>The detailed process to derive the final objective function is given in <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref> (Note ⊗ is the Kronecker product [<xref ref-type="bibr" rid="ref56">56</xref>]). The objective function for {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub> and {<italic>Q</italic><sup>i</sup><sub>k</sub>}<sup>K,M</sup><sub>k,i=1</sub> can be solved one by one iteratively as variable blocks [<xref ref-type="bibr" rid="ref55">55</xref>] by using the Newton-Raphson method [<xref ref-type="bibr" rid="ref57">57</xref>] until the estimates converge. To be specific, this approach first allows us to update <italic>W</italic><sub>k</sub> for each of <italic>k</italic> (<italic>k</italic>=1,2,…, <italic>K</italic>) with other <italic>W</italic><sub>l</sub> for all <italic>l</italic> (1≤<italic>l≠k</italic>≤<italic>K</italic>) and <italic>Q</italic> being fixed:</p>
      <disp-formula>
      <bold>(9)	</bold>
      <italic>W</italic>
      <sup>new</sup>
      <sub>k</sub> = 
      <italic>W</italic>
      <sub>k</sub> - (∂ 
      <sup>2</sup>
      <italic>f</italic> /∂ 
      <italic>W</italic>
      <sup>2</sup>
      <sub>k</sub>) 
      <sup>-1</sup>∂ 
      <italic>f</italic> /∂ 
      <italic>W</italic>
      <sub>k</sub></disp-formula>
      <p>Then, similarly, we update <italic>Q</italic><sup>i</sup><sub>k</sub> for each combination of (<italic>i</italic>, <italic>k</italic>) (1≤<italic>i</italic>≤<italic>M</italic>,1≤<italic>k</italic>≤<italic>K</italic>) with other combinations of (<italic>j</italic>, <italic>l</italic>) (1≤<italic>j</italic>≠<italic>i</italic>≤<italic>M</italic>,1≤<italic>l</italic>≠<italic>k</italic>≤<italic>K</italic>) and <italic>W</italic> being fixed:</p>  
      <disp-formula>
      <bold>(10)	</bold>
      <italic>Q</italic>
      <sup>i,new</sup>
      <sub>k</sub> = 
      <italic>Q</italic>
      <sup>i</sup>
      <sub>k</sub> - (∂ 
      <sup>2</sup>
      <italic>f</italic> /∂ 
      <italic>Q</italic>
      <sup>i</sup>
      <sub>k</sub>
      <sup>2</sup>) 
      <sup>-1</sup>∂ 
      <italic>f</italic> /∂ 
      <italic>Q</italic>
      <sup>i</sup>
      <sub>k</sub></disp-formula>
      <p>The derivation process for the first and second derivatives of <italic>W</italic> and <italic>Q</italic> is described in <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>. As derivatives are linearly decomposable by sites <italic>i</italic>, the objective function defined in equation 7 can be computed in a distributed manner. This means the optimization only requires locally computed statistics to be delivered to estimate the {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub> iteratively until convergence.</p>
      <p>The time complexity at each iteration depends on feature type <italic>k</italic> and site <italic>i</italic>. When updating <italic>W</italic><sub>k</sub> for each of <italic>k</italic> (1≤k≤<italic>K</italic>) with other <italic>W</italic><sub>l</sub> for all <italic>l</italic> (1≤<italic>l</italic>≠<italic>k</italic>≤<italic>K</italic>) and <italic>Q</italic> being fixed, the time complexity is <italic>O</italic> (<italic>d</italic><sub>k</sub><sup>3</sup>) because each site has to inverse the <italic>d</italic><sub>k</sub>ⅹ <italic>d</italic><sub>k</sub> Hessian matrix. When updating <italic>Q</italic><sup>i</sup><sub>k</sub> for each combination of (<italic>i</italic>, <italic>k</italic>) (1≤<italic>i</italic>≤<italic>M</italic>, 1≤<italic>k</italic>≤<italic>K</italic>) with all other combinations of (<italic>j</italic>, <italic>l</italic>) (1≤<italic>j</italic>≠<italic>i</italic>≤<italic>M</italic>, 1≤<italic>l</italic>≠<italic>k</italic>≤<italic>K</italic>) and <italic>W</italic> being fixed, the time complexity is <italic>O</italic> (<italic>b</italic><sub>k</sub><sup>3</sup><italic>N</italic><sub>i</sub><sup>3</sup>) because <italic>S</italic><sup>i</sup> has to inverse the <italic>b</italic><sub>k</sub><italic>N</italic><sub>i</sub>ⅹ<italic>b</italic><sub>k</sub><italic>N</italic><sub>i</sub> Hessian matrix. Therefore, parameters that have a significant effect on time complexity include original and projection dimensions by feature type and population size by site. Other parameters such as the number of sites <italic>M</italic> and the number of feature types <italic>K</italic> along with the number of iterations are excluded in the big <italic>O</italic> notation because they are just constants. That is unless the number of site or the number of feature type goes to infinity, it only has a small impact on the complexity.</p></sec>
      <sec>
        <title>Privacy-Preserving Patient Similarity Search in a Federated Setting</title>
        <p>To find similar patients across sites, hash codes for each site <italic>H</italic><sup>i</sup> (ie, {<italic>H</italic><sup>i</sup><sub>k</sub>}<sup>K</sup><sub>k=1</sub> have to be exchanged across institutions originally. However, when all other sites expect for <italic>i</italic>-th site receive <italic>H</italic><sup>i</sup> for similarity search, the patient-level information of <italic>i</italic>-th site can be leaked by equation 4; other sites and a server can be united for reverse engineering to extract <italic>P</italic><sup>i</sup>because they have both {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub> and <italic>H</italic><sup>i</sup>, as well as their information in equation 4. <xref ref-type="fig" rid="figure6">Figure 6</xref> illustrates the situation mentioned.</p>
        <p>Therefore, we suggest the way to search similarity among different sites by avoiding revealing <italic>H</italic><sup>i</sup><sub>k</sub> but able to compute similarities based on <italic>H</italic><sup>i</sup><sub>k</sub>. We introduce homomorphic encryption specifically that is a form of encryption where a specific algebraic operation performed on the plaintext is equivalent to another algebraic operation performed on the cipher-text, and an encrypted result, when decrypted, matches the result of the same operation performed on the plaintext. Unlike traditional encryption schemes that do not allow any computations to be performed on the cipher-text without first decrypting it, homomorphic encryption allows computations to be performed without decrypting the data. The results of the computations remain encrypted and can only be read and interpreted by someone with access to the decryption key. Therefore, it is appropriate to use homomorphic encryption in our case that other sites and a server can attack maliciously. It enables cross-site comparison of health care statistics with protecting privacy for each site. The procedure of homomorphic encryption in this paper is summarized as follows: first, <italic>i</italic>-th site encrypts hash codes for its query data and delivers encrypted codes to <italic>j</italic>-th site. Next, <italic>j</italic>-th site performs the computation between delivered encrypted codes of <italic>i</italic>-th site and encrypted codes of <italic>j</italic>-th site without a decryption key and sends the computed value to <italic>i</italic>-th site. Finally, <italic>i</italic>-th site decrypts the value to get the hamming distance of hash codes between query data and data of <italic>j</italic>-th site. Each site is restricted to only answer the hamming distance to avoid the risk of privacy leakage. This process is depicted in <xref ref-type="fig" rid="figure7">Figure 7</xref>.</p>
        <p>We note that homomorphic encryption provides an extra layer of privacy protection especially during patient similarity search.</p>
              </sec>
      <sec>
        <title>Security</title>
        <p>There are several participants in our framework.</p>
        <list list-type="bullet">
          <list-item>
            <p>Data custodians (DCs) represent institutions or hospitals who have access to patient data and would like to collaborate in learning about similar patients.</p>
          </list-item>
          <list-item>
            <p>Crypto service provider (CSP) generates public and private keys. The public key is provided to the data custodians to safeguard the intermediary statistics.</p>
          </list-item>
          <list-item>
            <p>Cloud server (CS) computes over summary statistics from individual data custodians to obtain a global patient similarity model.</p>
          </list-item>
        </list>
        <p>Our goal is that a DC does not learn patient-level information from other DCs during the process. We also want to ensure CS cannot infer patient-level information from the data. We assume a CSP is trustworthy and provides encryption keys (public and private). In the threat model, we assume the CS to be semi-honest, that is, it is honest to follow the protocol but curious about patient’s private information while executing the protocol. We make the following basic assumptions: (1) DC and CS do not collude, (2) CS and CSP also do not collude, and (3) DC always receives correct keys from the CSP. To evaluate the security of our system, it is assumed that the security of the system is compromised if patient-level data or intermediary statistics that can infer patient-level data are leaked. CSP is only involved in generating public and private keys and transferring those keys to DCs, and no access to unintended fine-grained local information is involved in this process.</p>
                <p>The leakage is related to computation of {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>, and possible scenarios according to the participants are as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>Leakage to CSP in each computation: CSP does not participate in computation at all. Therefore, there is no leakage.</p>
          </list-item>
          <list-item>
            <p>Leakage to DC in each computation: each DC cannot indirectly learn patient data from other DCs only with {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub> and its local information {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub> and {<italic>Q</italic><sup>i</sup><sub>k</sub>}<sup>K</sup><sub>k=1</sub>. If all DCs except for one collude, it is infeasible for the other DCs to reconstruct <italic>P</italic><sup>i</sup><sub>k</sub> of that one DC because the first and second derivatives of <italic>W</italic><sub>k</sub> have a nonlinear relationship for <italic>P</italic><sup>i</sup><sub>k</sub>. Specifically, it is not possible to specify a certain matrix only given information of covariance matrix because of insufficient equations. They also do not have information (first and second derivatives) about <italic>Q</italic><sup>i</sup><sub>k</sub>.</p>
          </list-item>
          <list-item>
            <p>Leakage to CS in each computation: CS cannot infer patient data from {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>. Even though CS receives local information for the first and second derivatives of {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>, it is infeasible for CS to recover {<italic>P</italic><sup>i</sup><sub>k</sub>}<sup>K</sup><sub>k=1</sub> for the same reason as the collusion among DCs. In finding similar patients, hash codes for each site {<italic>H</italic><sup>i</sup><sub>k</sub>}<sup>K</sup><sub>k=1</sub> have to be exchanged across institutions originally, but the use of homomorphic encryption prevents direct exchange of hash codes {<italic>H</italic><sup>i</sup><sub>k</sub>}<sup>K</sup><sub>k=1</sub> between DCs, and thus, there is no leakage.</p>
          </list-item>
        </list>
        
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Example of potential privacy leakage in patient similarity search across sites.</p>
          </caption>
          <graphic xlink:href="medinform_v6i2e20_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Privacy-preserving patient similarity search by homomorphic encryption; green key: encryption (public) key, blue key: decryption (private) key.</p>
          </caption>
          <graphic xlink:href="medinform_v6i2e20_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>

      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Experimental Setting</title>
        <p>We conducted experiments to validate our proposed method on real data. For comparison, we assumed two different systems against our system according to connection among <italic>M</italic> sites: open and closed system. In the open system, <italic>M</italic> sites can exchange their patient’s information without any restrictions; in the closed system, each site can only utilize patient's information in each site. Ours is in the middle of two systems. For better understanding of these systems, let us assume that there are three sites A, B, and C with the same number of patients <italic>N</italic>. In this situation, an open system means that every site can access the complete information of the entire patient cohort (3ⅹ <italic>N</italic>), including information from other sites as well, and thus, three sites work like one site without any concerns on privacy. On the other hand, closed system indicates that each site can only access its patient-level information (<italic>N</italic>) exclusively. Open system and closed system are derived based on an idealistic situation and a realistic situation, respectively, and our system is in between these two systems, which cannot access patient’s information from other sites but can utilize it through { <italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub>. Then, we predicted the incidence of a certain disease and compared the standard <italic>κ</italic>-nearest neighbor (<italic>κ</italic>-NN) classification results based on hamming distance of multi-hash codes from our system with those based on hamming distance of multi-hash codes from open and closed systems, as well as uni-hash codes from open and closed systems. We also provided baseline results based on four similarity distances of raw data without using hashing for open and closed systems: Euclidean, cityblock, cosine, and correlation. We utilized five-fold cross validation (CV) that randomly splits patients into five folds with the equal size; we used four folds for training, and one fold for testing. As an evaluation measure, we used the area under the curve (AUC) where the true positive rate (TPR; ie, the number of true positives divided by the sum of true positives and false negatives) is plotted against the false positive rate (ie, the number of false positives divided by the sum of false positives and true positives) at various thresholds. AUC as a summarized single value for the curve has desirable properties that are independent to the threshold and invariant to a priori class probability distributions. An area of 1 represents a perfect model, and an area of 0.5 represents a worthless model. As we repeated CV ten times, we obtained ten vectors consisting of probabilities based on <italic>κ</italic> nearest neighbors’ voting. The program was implemented by MATLAB 2015b (MathWorks).</p>
      </sec>
      <sec>
        <title>Temporal Sequence Construction</title>
        <p>A sequence is composed of lab tests, prescriptions, diagnoses, conditions, and symptoms that were given to a patient in multiple hospital admissions. We only extracted common lab tests, prescriptions, diagnoses, conditions, and symptoms (prefixed with “ <italic>l</italic>_,” “ <italic>p</italic>_,” “ <italic>d</italic>_,” “ <italic>c</italic>_,” and “ <italic>s</italic>_,” respectively). We used the International Classification of Diseases, 9th revision (ICD-9) level 3 codes instead of level 4 or 5 to avoid extreme sparsity of diagnoses. We assumed space in time between all events to be same. Then, we constructed data for incidence of a target disease as follows: for patients in which a target disease occurs, we sliced the very admission that includes the diagnosis event of a target disease out of the sequence, and used only events before that admission as a feature sequence. For other patients, we used all events. We utilized temporal information of a sequence to make a time-decayed vector representation; when we add a one-hot representation for each event, it is multiplied by the time decaying function (ie, exp(-<italic>γt</italic>) with the decay constant <italic>γ</italic>) that enables to weaken the effect of old event but to strengthen the effect of recent event. A graphical illustration of this sequence and its vector representation is presented in <xref ref-type="fig" rid="figure8">Figure 8</xref>.</p>
      </sec>
      <sec>
      <title>Multiparameter Intelligent Monitoring in Intensive Care-III Database</title>
      <p>We used Multiparameter Intelligent Monitoring in Intensive Care-III (MIMIC-III) database that contains health-related data associated with 46,520 patients and 58,976 admissions to the intensive care unit of Beth Israel Deaconess Medical Center from 2001 to 2012. The database consists of detailed information about patients, including demographics such as gender, age, and race; admissions; lab test results; prescription records; procedures; and discharge ICD diagnoses. On the basis of this database, we randomly selected several common diseases (ie, diseases with relatively large number of positives) as a target disease to verify that our method can perform well in general not only for a specific disease. Then, we extracted temporal sequences and constructed following six feature vectors (<italic>K</italic>=6) for patients in <italic>i</italic>-th site: demographic information <italic>P</italic><sup>i</sup><sub>1</sub>∈<italic>R</italic><sup>d</sup><sub>1</sub><sup>ⅹN</sup><sub>i</sub>, lab results <italic>P</italic><sup>i</sup><sub>2</sub>∈ <italic>R</italic><sup>d</sup><sub>2</sub><sup>ⅹN</sup><sub>i</sub>, diagnoses <italic>P</italic><sup>i</sup><sub>3</sub>∈<italic>R</italic><sup>d</sup><sub>3</sub><sup>ⅹN</sup><sub>i</sub>, prescriptions <italic>P</italic><sup>i</sup><sub>4</sub>∈<italic>R</italic><sup>d</sup><sub>4</sub><sup>ⅹN</sup><sub>i</sub>, conditions <italic>P</italic><sup>i</sup><sub>5</sub>∈<italic>R</italic><sup>d</sup><sub>5</sub><sup>ⅹN</sup><sub>i</sub>, and symptoms <italic>P</italic><sup>i</sup><sub>6</sub>∈ <italic>R</italic><sup>d</sup><sub>6</sub><sup>ⅹN</sup><sub>i</sub>. Time decay constant <italic>γ</italic> was set to 0.01. We note that the feature vector of diagnoses in each dataset does not include its outcome of interest. Information of original datasets is described in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
            <p>To test three-site scenario, we made datasets balanced and horizontally partitioned the dataset into three, assuming data are evenly partitioned among sites (<italic>M</italic>=3), <italic>P</italic><sup>1</sup><sub>k</sub>∈<italic>R</italic><sup>d</sup><sub>k</sub><sup>ⅹ125</sup>, <italic>P</italic><sup>2</sup><sub>k</sub>∈<italic>R</italic><sup>d</sup><sub>k</sub><sup>ⅹ125</sup>, and <italic>P</italic><sup>3</sup><sub>k</sub>∈<italic>R</italic><sup>d</sup><sub>k</sub><sup>ⅹ125</sup> for every <italic>k</italic>=1,…6; federated system is needed when each institution has a limited sample size that is not enough for an analysis. In addition, from the complexity analysis, time to implement the algorithm exponentially increases in proportion to the number of patients. On the basis of these, we randomly selected and placed 125 patients in each site. Then, we predicted the incidence of five diseases independently. We set parameters for regularizers <italic>λ</italic>=0.5 and <italic>η</italic>=10<sup>-3</sup> in common. In addition, for multi-hash approach, we reduced the original dimensions for each feature to ten (ie, <italic>b</italic><sub>k</sub>=10 for <italic>k</italic>=2,…,6) except for the demographic feature that was reduced to two (ie, <italic>b</italic><sub>1</sub>=2), and for uni-hash approach we reduced the total dimensionality to the sum of projection dimensions in multi-hash approach (ie, <italic>b</italic>=52). We note that the results would be robust to the projection dimensionality unless we have too many or too few of it. <xref ref-type="table" rid="table2">Table 2</xref> shows the results of <italic>κ</italic>-NN with <italic>κ</italic>=3 based on hamming distance for the following configurations: our system, open and closed systems with multi-hash, as well as open and closed systems with uni-hash.</p>
            
      <fig id="figure8" position="float">
        <label>Figure 8</label>
        <caption>
          <p>Example of constructing temporal sequence with target disease in red and its vector representation.</p>
        </caption>
        <graphic xlink:href="medinform_v6i2e20_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Description of five datasets from Multiparameter Intelligent Monitoring in Intensive Care-III (MIMIC-III) database.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="500"/>
          <col width="250"/>
          <col width="250"/>
          <thead>
            <tr valign="top">
              <td>Disease</td>
              <td>Data size (negative or positive)</td>
              <td>Dimension (<italic>d</italic><sub>k</sub>, <italic>k</italic>=1,…,6)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Disorders of lipoid metabolism</td>
              <td>4546/2990</td>
              <td>(12,204,814,1338,262,170)</td>
            </tr>
            <tr valign="top">
              <td>Hypertensive chronic kidney disease</td>
              <td>5652/1884</td>
              <td>(12,204,822,1338,266,169)</td>
            </tr>
            <tr valign="top">
              <td>Cardiac dysrhythmias</td>
              <td>3878/3658</td>
              <td>(12,204,817,1338,263,169)</td>
            </tr>
            <tr valign="top">
              <td>Heart failure</td>
              <td>4167/3369</td>
              <td>(12,204,819,1338,265,169)</td>
            </tr>
            <tr valign="top">
              <td>Acute renal failure</td>
              <td>4182/3354</td>
              <td>(12,204,809,1338,268,170)</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Averaged area under the curve (AUC) with SD of <italic>κ</italic>-NN (<italic>κ</italic>=3) based on hamming distance from our, open and closed systems with multi-hash approach and from open and closed systems with uni-hash approach and based on cosine distance from open and closed systems.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="160"/>
          <col width="120"/>
          <col width="120"/>
          <col width="120"/>
          <col width="120"/>
          <col width="120"/>
          <col width="120"/>
          <col width="120"/>
          <thead>
            <tr valign="top">
              <td>Disease</td>
              <td colspan="3">Multi-hash</td>
              <td colspan="2">Uni-hash</td>
              <td colspan="2">Baseline</td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>Our system, Averaged AUC (SD)</td>
              <td>Open system, Averaged AUC (SD)</td>
              <td>Closed system, Averaged AUC (SD)</td>
              <td>Open system, Averaged AUC (SD)</td>
              <td>Closed system, Averaged AUC (SD)</td>
              <td>Open system, Averaged AUC (SD)</td>
              <td>Closed system, Averaged AUC (SD)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Disorders of lipoid metabolism</td>
              <td>0.9330 (0.0086)</td>
              <td>0.9343 (0.0125)</td>
              <td>0.9002 (0.0285)</td>
              <td>0.9159 (0.0255)</td>
              <td>0.8486 (0.0271)</td>
              <td>0.8079 (0.0222)</td>
              <td>0.7945 (0.0308)</td>
            </tr>
            <tr valign="top">
              <td>Hypertensive chronic kidney disease</td>
              <td>0.9078 (0.0346)</td>
              <td>0.9283 (0.0432)</td>
              <td>0.8538 (0.0421)</td>
              <td>0.9270 (0.0350)</td>
              <td>0.8501 (0.0305)</td>
              <td>0.7823 (0.0261)</td>
              <td>0.7762 (0.0262)</td>
            </tr>
            <tr valign="top">
              <td>Cardiac dysrhythmias</td>
              <td>0.9135 (0.0287)</td>
              <td>0.9368 (0.0492)</td>
              <td>0.8833 (0.0397)</td>
              <td>0.9072 (0.0414)</td>
              <td>0.8236 (0.0328)</td>
              <td>0.7695 (0.0151)</td>
              <td>0.7340 (0.0343)</td>
            </tr>
            <tr valign="top">
              <td>Heart failure</td>
              <td>0.9058 (0.0282)</td>
              <td>0.9351 (0.0326)</td>
              <td>0.8798 (0.0414)</td>
              <td>0.9089 (0.0376)</td>
              <td>0.8471 (0.0248)</td>
              <td>0.7986 (0.0292)</td>
              <td>0.7733 (0.0421)</td>
            </tr>
            <tr valign="top">
              <td>Acute renal failure</td>
              <td>0.9169 (0.0397)</td>
              <td>0.9477 (0.0374)</td>
              <td>0.8637 (0.0320)</td>
              <td>0.8821 (0.0403)</td>
              <td>0.7929 (0.0378)</td>
              <td>0.7434 (0.0380)</td>
              <td>0.7289 (0.0341)</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      
            <fig id="figure9" position="float">
        <label>Figure 9</label>
        <caption>
          <p>Averaged area under the curve (AUC) of κ-NN (κ=3) for heart failure based on hamming distance from our, open and closed systems with multi-hash approach.</p>
        </caption>
        <graphic xlink:href="medinform_v6i2e20_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>

      <p>Additionally, <xref ref-type="table" rid="table2">Table 2</xref> presents a baseline result based on cosine distance obtained from open and closed systems, which has the highest AUC among baseline results. We note that the results for closed systems are the average of three sites.</p>
      <p><xref ref-type="fig" rid="figure9">Figure 9</xref> shows the comparison for heart failure of our open and closed systelabelms with multi-hash approach as an example. The prediction performance of our system is moderate between those of open and closed systems. It is encouraging that our system approaches open system without sharing local data. <xref ref-type="fig" rid="figure10">Figure 10</xref> also shows the comparison result for heart failure of our system with multi-hash approach and open and closed system with uni-hash approach. We can see the superior performance of our system over closed system as before. However, in this case, our system is comparable with open system and even outperformed it for three diseases; this may come from multi-hash approach is more effective than uni-hash approach to construct context-specific hash codes. <xref ref-type="fig" rid="figure11">Figure 11</xref> shows the results of our system with different <italic>κ</italic>. The detailed results with different <italic>κ</italic> are presented in <xref ref-type="app" rid="app2">Multimedia Appendix 2</xref>. AUC generally increases as <italic>κ</italic> increases.</p>
      <p>However, in real life, different sites have a different specialty and have a different distribution in patient data. To see how our platform works in random and skewed distribution, we differentiated the ratio of samples having negative and positive classes by site. We assumed that three sites, respectively, have 10%, 30%, and 50% of positive class for five diseases. Note that all other settings including the number of sites and patients for each site, projection dimensions, and parameters were set the same as before to test only the change originated from the class imbalance and for experimental convenience; we omitted the uni-hash approach, which is expected to have the similar trend about multi-hash approach to that shown in <xref ref-type="table" rid="table2">Table 2</xref>. <xref ref-type="table" rid="table3">Table 3</xref> shows the averaged AUC results from <italic>κ</italic>-NN with <italic>κ</italic>=3 based on hamming distance for our system, open and closed systems with multi-hash, and based on cosine distance for open and closed systems with raw data. For more elaborate comparison, F1, sensitivity (ie, TPR), and specificity (ie, the number of true negatives divided by the sum of true negatives and false positives) [<xref ref-type="bibr" rid="ref58">58</xref>] were also measured along with AUC (<xref ref-type="app" rid="app3">Multimedia Appendix 3</xref>); F1 is the harmonic mean of precision and recall where it reaches its best value at 1 and worst at 0. It can be interpreted as weighted average of the precision (ie, the number of true positives divided by the sum of true positives and false positives) and recall (ie, TPR, sensitivity) with their equal contribution.</p>    
      
      <fig id="figure10" position="float">
        <label>Figure 10</label>
        <caption>
          <p>Averaged area under the curve (AUC) of κ-NN (κ=3) for heart failure based on hamming distance from our system with multi-hash approach and open and closed systems with uni-hash approach.</p>
        </caption>
        <graphic xlink:href="medinform_v6i2e20_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      
      <fig id="figure11" position="float">
        <label>Figure 11</label>
        <caption>
          <p>Averaged area under the curve (AUC) of κ-NN with different κ (κ=1,3,9) for five diseases from our system.</p>
        </caption>
        <graphic xlink:href="medinform_v6i2e20_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Averaged area under the curve (AUC) with SD of <italic>κ</italic>-NN (<italic>κ</italic>=3) based on hamming distance from our, open, and closed systems with multi-hash approach and based on cosine distance from open and closed systems.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="180"/>
          <col width="140"/>
          <col width="140"/>
          <col width="140"/>
          <col width="140"/>
          <col width="140"/>
          <thead>
            <tr valign="top">
              <td>Disease</td>
              <td colspan="3">Multi-hash</td>
              <td colspan="2">Baseline</td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>Our system, AUC (SD)</td>
              <td>Open system, AUC (SD)</td>
              <td>Closed system, AUC (SD)</td>
              <td>Open system, AUC (SD)</td>
              <td>Closed system, AUC (SD)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Disorders of lipoid metabolism</td>
              <td>0.8056 (0.0386)</td>
              <td>0.8309 (0.0412)</td>
              <td>0.7629 (0.0295)</td>
              <td>0.7525 (0.0212)</td>
              <td>0.7104 (0.0187)</td>
            </tr>
            <tr valign="top">
              <td>Hypertensive chronic kidney disease</td>
              <td>0.7637 (0.0367)</td>
              <td>0.7924 (0.0209)</td>
              <td>0.7275 (0.0266)</td>
              <td>0.7296 (0.0215)</td>
              <td>0.7141 (0.0207)</td>
            </tr>
            <tr valign="top">
              <td>Cardiac dysrhythmias</td>
              <td>0.7840 (0.0301)</td>
              <td>0.7937 (0.0228)</td>
              <td>0.7659 (0.0223)</td>
              <td>0.7638 (0.0198)</td>
              <td>0.7385 (0.0188)</td>
            </tr>
            <tr valign="top">
              <td>Heart failure</td>
              <td>0.8287 (0.0283)</td>
              <td>0.8832 (0.0278)</td>
              <td>0.7459 (0.0331)</td>
              <td>0.7735 (0.0206)</td>
              <td>0.6778 (0.0213)</td>
            </tr>
            <tr valign="top">
              <td>Acute renal failure</td>
              <td>0.8239 (0.0326)</td>
              <td>0.8704 (0.0335)</td>
              <td>0.7558 (0.0263)</td>
              <td>0.7304 (0.0218)</td>
              <td>0.7415 (0.0225)</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      
      <table-wrap position="float" id="table4">
        <label>Table 4</label>
        <caption>
          <p>Averaged execution time of each basic cryptographic operation for five diseases.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="160"/>
          <col width="140"/>
          <col width="150"/>
          <col width="140"/>
          <col width="140"/>
          <col width="140"/>
          <thead>
            <tr valign="top">
              <td>Operation</td>
              <td colspan="5">Time (seconds)</td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>Disorders of lipoid metabolism</td>
              <td>Hypertensive chronic kidney disease</td>
              <td>Cardiac dysrhythmias</td>
              <td>Heart failure</td>
              <td>Acute renal failure</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Homomorphic encryption</td>
              <td>1.9</td>
              <td>2.2</td>
              <td>2.2</td>
              <td>2.3</td>
              <td>2.2</td>
            </tr>
            <tr valign="top">
              <td>Initialization</td>
              <td>5.2</td>
              <td>6.3</td>
              <td>5.8</td>
              <td>6.5</td>
              <td>6.0</td>
            </tr>
            <tr valign="top">
              <td>Comparison</td>
              <td>994.2</td>
              <td>1243.9</td>
              <td>1067.1</td>
              <td>1131.7</td>
              <td>1066.5</td>
            </tr>
            <tr valign="top">
              <td>Homomorphic decryption</td>
              <td>0.4</td>
              <td>0.4</td>
              <td>0.4</td>
              <td>0.4</td>
              <td>0.4</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      
                        <p>Most of the results can be interpreted in the same context as <xref ref-type="table" rid="table2">Table 2</xref>, but it should be noted that the degree of performance degradation in our system (~13%) is greater than that at baseline (~5%). Given these results from open and closed systems, as well as our system with multi-hash approach, accuracy might be lost because of the instability caused by updating weights {<italic>W</italic><sub>k</sub>}<sup>K</sup><sub>k=1</sub> with information from skewed distributions. However, it is encouraging that sensitivity is obtained stably in multi-hash approach rather than baseline. Sensitivity is an important measure in medical analysis because it is much more dangerous to diagnose that the disease has not occurred even though it has already developed than the opposite case. The fact that F1 is significantly larger is consistent with this. Therefore, considering all the results, we believe that our system is a useful alternative.</p>
      <p>Next, we performed secure data aggregation and data comparison among different sites in a federated setting by which each site is able to retrieve its hamming distance under certain criteria in a privacy-preserving manner. In our experiments with balanced data, each row has 52 bits (hash code), and a 128-bit encryption key is used for homomorphic encryption. We measured the execution time of some key cryptographic operations in a workstation with an Intel 2.5 GHz CPU, where all the results are averaged over five-fold CV of total time for six cases (three test sets by two training sets). The execution time of each basic cryptographic operation has been profiled and shown in <xref ref-type="table" rid="table4">Table 4</xref>.</p>
      <p>We confirmed that the calculated similarities across sites are the same when exchanging raw {<italic>H</italic><sup>i</sup><sub>k</sub>}<sup>K</sup><sub>k=1</sub> directly with each other (ie, without homomorphic encryption) or exchanging encrypted {<italic>H</italic><sup>i</sup><sub>k</sub>}<sup>K</sup><sub>k=1</sub> (ie, with homomorphic encryption) with each other. Therefore, the results after homomorphic encryption were obtained exactly the same as the results in <xref ref-type="table" rid="table2">Tables 2</xref> and <xref ref-type="table" rid="table3">3</xref> and <xref ref-type="fig" rid="figure9">Figures 9</xref> to <xref ref-type="fig" rid="figure11">11</xref> without any privacy leakage.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>There are several limitations in the proposed framework. When learning hash functions, the assumption is that each site has common feature events that should be needed. However, different sites, for example, hospitals, may have different event types, and additionally, the notation system for each event type cannot be standardized except for diagnoses, symptoms, and conditions that are based on ICD-9. Even though we have the limitation of common feature events, we believe that our methodology can be still useful for cooperating hospitals eager to find similar patients across sites at the point of care. We are planning to develop a new and more practical approach to relax this assumption.</p>
        <p>Basically, our system works better when all the participants have similar distributions. However, we have confirmed through the imbalance class experiment that our system still works well with different distributions, as well at the cost of some performance degradation. We will address more generalized imbalance data problem in future work.</p>
        <p>Next, even if we have computational benefits by adopting a multi-hash approach compared with a uni-hash approach, and the computational complexity is not prohibitive in practice, a technical challenge still remains in scalable hash function learning when the sample size and the feature dimensionality are large. This is because the complexity for inverting Hessian matrices in our algorithm is affected by the sample size and the feature dimensionality. This is an expensive operation of time complexity and requires a lot of memory. We can solve this problem by using parallelization or graphics processing units or utilizing a gradient descent method that replaces the inversion of Hessian matrix with a constant or a variable varying with the iteration number.</p>
        <p>We demonstrated the feasibility of privacy-preserving similarity search, and the experiments were conducted on a single machine (with different processes) to serve as a proof of concept. In practice, we need to deploy the algorithm in multiple computers, and that is a trivial task. We will execute this algorithm using secure multiparty computation such as in the Secure Multi-pArty Computation Grid LOgistic REgression [<xref ref-type="bibr" rid="ref59">59</xref>] in future work.</p>
        <p>We have also listed several limitations to consider for more elaborate future work. When constructing temporal sequences, it assumes the sequence events are sampled at the same frequency for simplicity, which means the temporal effect has not been represented in this work. We roughly determined parameters of projection dimension and decay factor, which might not be optimal. In our experiment, we used 3-digit ICD to show a proof of concept, but the granularity of the ICD code will affect the performance in real applications, especially if the interest is related to the rare ones.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>We proposed a federated patient hashing framework and developed a privacy-preserving patient similarity learning algorithm. This technique allows to learn hash codes for each patient reflecting information of different sites without sharing patient-level data. Using MIMIC-III database, we conducted experiments to demonstrate the accuracy and usability of the proposed algorithm. By utilizing the multi-hash approach, our algorithm obtained more usable and practical results than the uni-hash approach. To avoid privacy leakage in patient similarity search, we also applied homomorphic encryption able to calculate the hamming distance without transmitting hash codes. As a result, we confirmed the same results without any privacy leakage.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <app id="app1">
        <title>Multimedia Appendix 1</title>
        <p>Privacy-preserving patient representation learning in a federated setting.</p>
        <media xlink:href="medinform_v6i2e20_app1.pdf" xlink:title="PDF File (Adobe PDF File), 113KB"/>
      </app>
      <app id="app2">
        <title>Multimedia Appendix 2</title>
        <p>Prediction performance of balanced class datasets.</p>
        <media xlink:href="medinform_v6i2e20_app2.pdf" xlink:title="PDF File (Adobe PDF File), 130KB"/>
      </app>
      <app id="app3">
        <title>Multimedia Appendix 3</title>
        <p>Prediction performance of imbalanced class datasets.</p>
        <media xlink:href="medinform_v6i2e20_app3.pdf" xlink:title="PDF File (Adobe PDF File), 33KB"/>
      </app>
      <app id="app4">
        <title>Multimedia Appendix 4</title>
        <p>Prediction performance of balanced class datasets.</p>
        <media xlink:href="medinform_v6i2e20_app4.pdf" xlink:title="PDF File (Adobe PDF File), 41KB"/>
      </app>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CS</term>
          <def>
            <p>cloud server</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CSP</term>
          <def>
            <p>crypto service provider</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CV</term>
          <def>
            <p>cross validation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">DC</term>
          <def>
            <p>data custodian</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ICD-9</term>
          <def>
            <p>International Classification of Diseases, 9th revision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">MIMIC-III</term>
          <def>
            <p>Multiparameter Intelligent Monitoring in Intensive Care-III</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">TPR</term>
          <def>
            <p>true positive rate</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by NHGRI grants R00HG008175, R01HG008802, and R01HG007078, NIGMS R01GM114612, NLM grants R00LM011392, R21LM012060, and NHLBI grant U54HL108460. The work of FW is supported by NSF IIS-1650723 and IIS-1716432.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kononenko</surname>
            <given-names>I</given-names>
          </name>
        </person-group>
        <article-title>Machine learning for medical diagnosis: history, state of the art and perspective</article-title>
        <source>Artif Intell Med</source>  
        <year>2001</year>  
        <month>08</month>  
        <volume>23</volume>  
        <issue>1</issue>  
        <fpage>89</fpage>  
        <lpage>109</lpage>  
        <pub-id pub-id-type="medline">11470218</pub-id>
        <pub-id pub-id-type="pii">S0933-3657(01)00077-X</pub-id></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Roque</surname>
            <given-names>FS</given-names>
          </name>
          <name name-style="western">
            <surname>Jensen</surname>
            <given-names>PB</given-names>
          </name>
          <name name-style="western">
            <surname>Schmock</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Dalgaard</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Andreatta</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Hansen</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Søeby</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Bredkjær</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Juul</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Werge</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Jensen</surname>
            <given-names>LJ</given-names>
          </name>
          <name name-style="western">
            <surname>Brunak</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Using electronic patient records to discover disease correlations and stratify patient cohorts</article-title>
        <source>PLoS Comput Biol</source>  
        <year>2011</year>  
        <month>08</month>  
        <volume>7</volume>  
        <issue>8</issue>  
        <fpage>e1002141</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pcbi.1002141"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pcbi.1002141</pub-id>
        <pub-id pub-id-type="medline">21901084</pub-id>
        <pub-id pub-id-type="pii">PCOMPBIOL-D-11-00196</pub-id>
        <pub-id pub-id-type="pmcid">PMC3161904</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Savage</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Better medicine through machine learning</article-title>
        <source>Commun ACM</source>  
        <year>2012</year>  
        <month>01</month>  
        <day>01</day>  
        <volume>55</volume>  
        <issue>1</issue>  
        <fpage>17</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://paperpile.com/b/4F2MYV/dbIe"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1145/2063176.2063182</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Hu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Luo</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Markatou</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Edabollahi</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Steinhubl</surname>
            <given-names>SE</given-names>
          </name>
          <name name-style="western">
            <surname>Daar</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Stewart</surname>
            <given-names>WF</given-names>
          </name>
        </person-group>
        <article-title>Combining knowledge and data driven insights for identifying risk factors using electronic health records</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2012</year>  
        <volume>2012</volume>  
        <fpage>901</fpage>  
        <lpage>10</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23304365"/>
        </comment>  
        <pub-id pub-id-type="medline">23304365</pub-id>
        <pub-id pub-id-type="pmcid">PMC3540578</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Visweswaran</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Angus</surname>
            <given-names>DC</given-names>
          </name>
          <name name-style="western">
            <surname>Hsieh</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Weissfeld</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Yealy</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Cooper</surname>
            <given-names>GF</given-names>
          </name>
        </person-group>
        <article-title>Learning patient-specific predictive models from clinical data</article-title>
        <source>J Biomed Inform</source>  
        <year>2010</year>  
        <month>10</month>  
        <volume>43</volume>  
        <issue>5</issue>  
        <fpage>669</fpage>  
        <lpage>85</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S1532-0464(10)00056-0"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2010.04.009</pub-id>
        <pub-id pub-id-type="medline">20450985</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(10)00056-0</pub-id>
        <pub-id pub-id-type="pmcid">PMC2933959</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Roy</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Stewart</surname>
            <given-names>WF</given-names>
          </name>
        </person-group>
        <article-title>Prediction modeling using EHR data: challenges, strategies, and a comparison of machine learning approaches</article-title>
        <source>Med Care</source>  
        <year>2010</year>  
        <month>06</month>  
        <volume>48</volume>  
        <issue>6 Suppl</issue>  
        <fpage>S106</fpage>  
        <lpage>13</lpage>  
        <pub-id pub-id-type="doi">10.1097/MLR.0b013e3181de9e17</pub-id>
        <pub-id pub-id-type="medline">20473190</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ebadollahi</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Gotz</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Hu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Sow</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Neti</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Predicting patient's trajectory of physiological data using temporal trends in similar patients: a system for near-term prognostics</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2010</year>  
        <month>11</month>  
        <day>13</day>  
        <volume>2010</volume>  
        <fpage>192</fpage>  
        <lpage>6</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21346967"/>
        </comment>  
        <pub-id pub-id-type="medline">21346967</pub-id>
        <pub-id pub-id-type="pmcid">PMC3041306</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Jensen</surname>
            <given-names>PB</given-names>
          </name>
          <name name-style="western">
            <surname>Jensen</surname>
            <given-names>LJ</given-names>
          </name>
          <name name-style="western">
            <surname>Brunak</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Mining electronic health records: towards better research applications and clinical care</article-title>
        <source>Nat Rev Genet</source>  
        <year>2012</year>  
        <month>05</month>  
        <day>02</day>  
        <volume>13</volume>  
        <issue>6</issue>  
        <fpage>395</fpage>  
        <lpage>405</lpage>  
        <pub-id pub-id-type="doi">10.1038/nrg3208</pub-id>
        <pub-id pub-id-type="medline">22549152</pub-id>
        <pub-id pub-id-type="pii">nrg3208</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Saria</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Rajani</surname>
            <given-names>AK</given-names>
          </name>
          <name name-style="western">
            <surname>Gould</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Koller</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Penn</surname>
            <given-names>AA</given-names>
          </name>
        </person-group>
        <article-title>Integration of early physiological responses predicts later illness severity in preterm infants</article-title>
        <source>Sci Transl Med</source>  
        <year>2010</year>  
        <month>09</month>  
        <day>08</day>  
        <volume>2</volume>  
        <issue>48</issue>  
        <fpage>48ra65</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://stm.sciencemag.org/cgi/pmidlookup?view=long&#38;pmid=20826840"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1126/scitranslmed.3001304</pub-id>
        <pub-id pub-id-type="medline">20826840</pub-id>
        <pub-id pub-id-type="pii">2/48/48ra65</pub-id>
        <pub-id pub-id-type="pmcid">PMC3564961</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Sow</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Hu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Ebadollahi</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>A system for mining temporal physiological data streams for advanced prognostic decision support</article-title>
        <conf-name>IEEE 10th International Conference on Data Mining (ICDM)</conf-name>
        <conf-date>December 13-17, 2010</conf-date>
        <conf-loc>Sydney, NSW, Australia</conf-loc>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://ieeexplore.ieee.org/document/5694085/"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bennett</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Doub</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Selove</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>EHRs connect research and practice: Where predictive modeling, artificial intelligence, and clinical decision support intersect</article-title>
        <source>Health Policy Technol</source>  
        <year>2012</year>  
        <month>6</month>  
        <volume>1</volume>  
        <issue>2</issue>  
        <fpage>105</fpage>  
        <lpage>114</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://paperpile.com/b/4F2MYV/3Szd"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.hlpt.2012.03.001</pub-id></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Greengard</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>A new model for healthcare</article-title>
        <source>Commun ACM</source>  
        <year>2013</year>  
        <month>02</month>  
        <day>01</day>  
        <volume>56</volume>  
        <issue>2</issue>  
        <fpage>17</fpage>  
        <lpage>19</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://paperpile.com/b/4F2MYV/49Kc"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1145/2408776.2408783</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ramakrishnan</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Hanauer</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Keller</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>Mining electronic health records</article-title>
        <source>Computer</source>  
        <year>2010</year>  
        <month>10</month>  
        <volume>43</volume>  
        <issue>10</issue>  
        <fpage>77</fpage>  
        <lpage>81</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://paperpile.com/b/4F2MYV/f42V"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1109/mc.2010.292</pub-id></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Romano</surname>
            <given-names>MJ</given-names>
          </name>
          <name name-style="western">
            <surname>Stafford</surname>
            <given-names>RS</given-names>
          </name>
        </person-group>
        <article-title>Electronic health records and clinical decision support systems: impact on national ambulatory care quality</article-title>
        <source>Arch Intern Med</source>  
        <year>2011</year>  
        <month>05</month>  
        <day>23</day>  
        <volume>171</volume>  
        <issue>10</issue>  
        <fpage>897</fpage>  
        <lpage>903</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21263077"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1001/archinternmed.2010.527</pub-id>
        <pub-id pub-id-type="medline">21263077</pub-id>
        <pub-id pub-id-type="pii">archinternmed.2010.527</pub-id>
        <pub-id pub-id-type="pmcid">PMC4016790</pub-id></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Newton</surname>
            <given-names>KM</given-names>
          </name>
          <name name-style="western">
            <surname>Peissig</surname>
            <given-names>PL</given-names>
          </name>
          <name name-style="western">
            <surname>Kho</surname>
            <given-names>AN</given-names>
          </name>
          <name name-style="western">
            <surname>Bielinski</surname>
            <given-names>SJ</given-names>
          </name>
          <name name-style="western">
            <surname>Berg</surname>
            <given-names>RL</given-names>
          </name>
          <name name-style="western">
            <surname>Choudhary</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Basford</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Chute</surname>
            <given-names>CG</given-names>
          </name>
          <name name-style="western">
            <surname>Kullo</surname>
            <given-names>IJ</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Pacheco</surname>
            <given-names>JA</given-names>
          </name>
          <name name-style="western">
            <surname>Rasmussen</surname>
            <given-names>LV</given-names>
          </name>
          <name name-style="western">
            <surname>Spangler</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Denny</surname>
            <given-names>JC</given-names>
          </name>
        </person-group>
        <article-title>Validation of electronic medical record-based phenotyping algorithms: results and lessons learned from the eMERGE network</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2013</year>  
        <month>06</month>  
        <volume>20</volume>  
        <issue>e1</issue>  
        <fpage>e147</fpage>  
        <lpage>54</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23531748"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/amiajnl-2012-000896</pub-id>
        <pub-id pub-id-type="medline">23531748</pub-id>
        <pub-id pub-id-type="pii">amiajnl-2012-000896</pub-id>
        <pub-id pub-id-type="pmcid">PMC3715338</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Denny</surname>
            <given-names>JC</given-names>
          </name>
          <name name-style="western">
            <surname>Bastarache</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Ritchie</surname>
            <given-names>MD</given-names>
          </name>
          <name name-style="western">
            <surname>Carroll</surname>
            <given-names>RJ</given-names>
          </name>
          <name name-style="western">
            <surname>Zink</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Mosley</surname>
            <given-names>JD</given-names>
          </name>
          <name name-style="western">
            <surname>Field</surname>
            <given-names>JR</given-names>
          </name>
          <name name-style="western">
            <surname>Pulley</surname>
            <given-names>JM</given-names>
          </name>
          <name name-style="western">
            <surname>Ramirez</surname>
            <given-names>AH</given-names>
          </name>
          <name name-style="western">
            <surname>Bowton</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Basford</surname>
            <given-names>MA</given-names>
          </name>
          <name name-style="western">
            <surname>Carrell</surname>
            <given-names>DS</given-names>
          </name>
          <name name-style="western">
            <surname>Peissig</surname>
            <given-names>PL</given-names>
          </name>
          <name name-style="western">
            <surname>Kho</surname>
            <given-names>AN</given-names>
          </name>
          <name name-style="western">
            <surname>Pacheco</surname>
            <given-names>JA</given-names>
          </name>
          <name name-style="western">
            <surname>Rasmussen</surname>
            <given-names>LV</given-names>
          </name>
          <name name-style="western">
            <surname>Crosslin</surname>
            <given-names>DR</given-names>
          </name>
          <name name-style="western">
            <surname>Crane</surname>
            <given-names>PK</given-names>
          </name>
          <name name-style="western">
            <surname>Pathak</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Bielinski</surname>
            <given-names>SJ</given-names>
          </name>
          <name name-style="western">
            <surname>Pendergrass</surname>
            <given-names>SA</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Hindorff</surname>
            <given-names>LA</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Manolio</surname>
            <given-names>TA</given-names>
          </name>
          <name name-style="western">
            <surname>Chute</surname>
            <given-names>CG</given-names>
          </name>
          <name name-style="western">
            <surname>Chisholm</surname>
            <given-names>RL</given-names>
          </name>
          <name name-style="western">
            <surname>Larson</surname>
            <given-names>EB</given-names>
          </name>
          <name name-style="western">
            <surname>Jarvik</surname>
            <given-names>GP</given-names>
          </name>
          <name name-style="western">
            <surname>Brilliant</surname>
            <given-names>MH</given-names>
          </name>
          <name name-style="western">
            <surname>McCarty</surname>
            <given-names>CA</given-names>
          </name>
          <name name-style="western">
            <surname>Kullo</surname>
            <given-names>IJ</given-names>
          </name>
          <name name-style="western">
            <surname>Haines</surname>
            <given-names>JL</given-names>
          </name>
          <name name-style="western">
            <surname>Crawford</surname>
            <given-names>DC</given-names>
          </name>
          <name name-style="western">
            <surname>Masys</surname>
            <given-names>DR</given-names>
          </name>
          <name name-style="western">
            <surname>Roden</surname>
            <given-names>DM</given-names>
          </name>
        </person-group>
        <article-title>Systematic comparison of phenome-wide association study of electronic medical record data and genome-wide association study data</article-title>
        <source>Nat Biotechnol</source>  
        <year>2013</year>  
        <month>12</month>  
        <volume>31</volume>  
        <issue>12</issue>  
        <fpage>1102</fpage>  
        <lpage>10</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24270849"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1038/nbt.2749</pub-id>
        <pub-id pub-id-type="medline">24270849</pub-id>
        <pub-id pub-id-type="pii">nbt.2749</pub-id>
        <pub-id pub-id-type="pmcid">PMC3969265</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Platt</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Carnahan</surname>
            <given-names>RM</given-names>
          </name>
          <name name-style="western">
            <surname>Brown</surname>
            <given-names>JS</given-names>
          </name>
          <name name-style="western">
            <surname>Chrischilles</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Curtis</surname>
            <given-names>LH</given-names>
          </name>
          <name name-style="western">
            <surname>Hennessy</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Nelson</surname>
            <given-names>JC</given-names>
          </name>
          <name name-style="western">
            <surname>Racoosin</surname>
            <given-names>JA</given-names>
          </name>
          <name name-style="western">
            <surname>Robb</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Schneeweiss</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Toh</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Weiner</surname>
            <given-names>MG</given-names>
          </name>
        </person-group>
        <article-title>The U.S. Food and Drug Administration's Mini-Sentinel program: status and direction</article-title>
        <source>Pharmacoepidemiol Drug Saf</source>  
        <year>2012</year>  
        <volume>21 Suppl 1</volume>  
        <fpage>1</fpage>  
        <lpage>8</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://paperpile.com/b/4F2MYV/s6o0"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1002/pds.2343</pub-id>
        <pub-id pub-id-type="medline">22262586</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hripcsak</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Duke</surname>
            <given-names>JD</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>NH</given-names>
          </name>
          <name name-style="western">
            <surname>Reich</surname>
            <given-names>CG</given-names>
          </name>
          <name name-style="western">
            <surname>Huser</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Schuemie</surname>
            <given-names>MJ</given-names>
          </name>
          <name name-style="western">
            <surname>Suchard</surname>
            <given-names>MA</given-names>
          </name>
          <name name-style="western">
            <surname>Park</surname>
            <given-names>RW</given-names>
          </name>
          <name name-style="western">
            <surname>Wong</surname>
            <given-names>IC</given-names>
          </name>
          <name name-style="western">
            <surname>Rijnbeek</surname>
            <given-names>PR</given-names>
          </name>
          <name name-style="western">
            <surname>van der Lei</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Pratt</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Norén</surname>
            <given-names>GN</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>YC</given-names>
          </name>
          <name name-style="western">
            <surname>Stang</surname>
            <given-names>PE</given-names>
          </name>
          <name name-style="western">
            <surname>Madigan</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Ryan</surname>
            <given-names>PB</given-names>
          </name>
        </person-group>
        <article-title>Observational Health Data Sciences and Informatics (OHDSI): opportunities for observational researchers</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2015</year>  
        <volume>216</volume>  
        <fpage>574</fpage>  
        <lpage>8</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26262116"/>
        </comment>  
        <pub-id pub-id-type="medline">26262116</pub-id>
        <pub-id pub-id-type="pmcid">PMC4815923</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Fleurence</surname>
            <given-names>RL</given-names>
          </name>
          <name name-style="western">
            <surname>Curtis</surname>
            <given-names>LH</given-names>
          </name>
          <name name-style="western">
            <surname>Califf</surname>
            <given-names>RM</given-names>
          </name>
          <name name-style="western">
            <surname>Platt</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Selby</surname>
            <given-names>JV</given-names>
          </name>
          <name name-style="western">
            <surname>Brown</surname>
            <given-names>JS</given-names>
          </name>
        </person-group>
        <article-title>Launching PCORnet, a national patient-centered clinical research network</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2014</year>  
        <volume>21</volume>  
        <issue>4</issue>  
        <fpage>578</fpage>  
        <lpage>82</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24821743"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/amiajnl-2014-002747</pub-id>
        <pub-id pub-id-type="medline">24821743</pub-id>
        <pub-id pub-id-type="pii">amiajnl-2014-002747</pub-id>
        <pub-id pub-id-type="pmcid">PMC4078292</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Weber</surname>
            <given-names>GM</given-names>
          </name>
          <name name-style="western">
            <surname>Murphy</surname>
            <given-names>SN</given-names>
          </name>
          <name name-style="western">
            <surname>McMurry</surname>
            <given-names>AJ</given-names>
          </name>
          <name name-style="western">
            <surname>Macfadden</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Nigrin</surname>
            <given-names>DJ</given-names>
          </name>
          <name name-style="western">
            <surname>Churchill</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Kohane</surname>
            <given-names>IS</given-names>
          </name>
        </person-group>
        <article-title>The Shared Health Research Information Network (SHRINE): a prototype federated query tool for clinical data repositories</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2009</year>  
        <volume>16</volume>  
        <issue>5</issue>  
        <fpage>624</fpage>  
        <lpage>30</lpage>  
        <pub-id pub-id-type="doi">10.1197/jamia.M3191</pub-id>
        <pub-id pub-id-type="medline">19567788</pub-id>
        <pub-id pub-id-type="pii">M3191</pub-id>
        <pub-id pub-id-type="pmcid">PMC2744712</pub-id></nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ng</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Ghoting</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Steinhubl</surname>
            <given-names>SR</given-names>
          </name>
          <name name-style="western">
            <surname>Stewart</surname>
            <given-names>WF</given-names>
          </name>
          <name name-style="western">
            <surname>Malin</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>PARAMO: a PARAllel predictive MOdeling platform for healthcare analytic research using electronic health records</article-title>
        <source>J Biomed Inform</source>  
        <year>2014</year>  
        <month>04</month>  
        <volume>48</volume>  
        <fpage>160</fpage>  
        <lpage>70</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S1532-0464(13)00203-7"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2013.12.012</pub-id>
        <pub-id pub-id-type="medline">24370496</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(13)00203-7</pub-id>
        <pub-id pub-id-type="pmcid">PMC4075460</pub-id></nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Gallego</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Walter</surname>
            <given-names>SR</given-names>
          </name>
          <name name-style="western">
            <surname>Day</surname>
            <given-names>RO</given-names>
          </name>
          <name name-style="western">
            <surname>Dunn</surname>
            <given-names>AG</given-names>
          </name>
          <name name-style="western">
            <surname>Sivaraman</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Longhurst</surname>
            <given-names>CA</given-names>
          </name>
          <name name-style="western">
            <surname>Coiera</surname>
            <given-names>E</given-names>
          </name>
        </person-group>
        <article-title>Bringing cohort studies to the bedside: framework for a 'green button' to support clinical decision-making</article-title>
        <source>J Comp Eff Res</source>  
        <year>2015</year>  
        <month>05</month>  
        <day>11</day>  
        <fpage>1</fpage>  
        <lpage>7</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.futuremedicine.com/doi/10.2217/cer.15.12"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2217/cer.15.12</pub-id>
        <pub-id pub-id-type="medline">25959863</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Ohno-Machado</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Grid Binary LOgistic REgression (GLORE): building shared models without sharing data</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2012</year>  
        <volume>19</volume>  
        <issue>5</issue>  
        <fpage>758</fpage>  
        <lpage>64</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22511014"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/amiajnl-2012-000862</pub-id>
        <pub-id pub-id-type="medline">22511014</pub-id>
        <pub-id pub-id-type="pii">amiajnl-2012-000862</pub-id>
        <pub-id pub-id-type="pmcid">PMC3422844</pub-id></nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jaideep</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Privacy-preserving SVM using nonlinear kernels on horizontally partitioned data</article-title>
          <conf-name>The ACM symposium on Applied computing</conf-name>
          <conf-date>April 23-27, 2006</conf-date>
          <conf-loc>Dijon, France</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Vaidya</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>X</given-names>
          </name>
        </person-group>
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Ng</surname>
            <given-names>WK</given-names>
          </name>
          <name name-style="western">
            <surname>Kitsuregawa</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Chang</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>Privacy-preserving SVM classification on vertically partitioned data</article-title>
        <source>Advances in Knowledge Discovery and Data Mining. PAKDD 2006. Lecture Notes in Computer Science, vol 3918</source>  
        <year>2006</year>  
        <publisher-loc>Berlin, Heidelberg</publisher-loc>
        <publisher-name>Springer</publisher-name>
        <fpage>647</fpage>  
        <lpage>656</lpage> </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>CL</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Ji</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Xiong</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Ohno-Machado</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>WebDISCO: a web service for distributed cox model learning without patient-level data sharing</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2015</year>  
        <month>11</month>  
        <volume>22</volume>  
        <issue>6</issue>  
        <fpage>1212</fpage>  
        <lpage>9</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26159465"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1093/jamia/ocv083</pub-id>
        <pub-id pub-id-type="medline">26159465</pub-id>
        <pub-id pub-id-type="pii">ocv083</pub-id>
        <pub-id pub-id-type="pmcid">PMC5009917</pub-id></nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ebadollahi</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Integrating Distance Metrics Learned from Multiple Experts and its Application in Patient Similarity Assessment</article-title>
          <conf-name>The SIAM International Conference on Data Mining</conf-name>
          <conf-date>April 28, 2011</conf-date>
          <conf-loc>Mesa, Arizona</conf-loc>
          <pub-id pub-id-type="doi">10.1137/1.9781611972818.6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Medical prognosis based on patient similarity and expert feedback</article-title>
          <conf-name>21st International Conference on Pattern Recognition (ICPR) 2012</conf-name>
          <conf-date>November 11-15, 2012</conf-date>
          <conf-loc>Tsukuba, Japan</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Hu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Edabollahi</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Supervised patient similarity measure of heterogeneous patient records</article-title>
        <source>SIGKDD Explor Newsl</source>  
        <year>2012</year>  
        <month>12</month>  
        <day>10</day>  
        <volume>14</volume>  
        <issue>1</issue>  
        <fpage>16</fpage>  
        <lpage>24</lpage>  
        <pub-id pub-id-type="doi">10.1145/2408736.2408740</pub-id></nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Ebadollahi</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Composite distance metric integration by leveraging multiple experts' inputs and its application in patient similarity assessment</article-title>
        <source>Stat Anal Data Min</source>  
        <year>2012</year>  
        <month>02</month>  
        <day>17</day>  
        <volume>5</volume>  
        <issue>1</issue>  
        <fpage>54</fpage>  
        <lpage>69</lpage>  
        <pub-id pub-id-type="doi">10.1002/sam.11135</pub-id></nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gentry</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Fully homomorphic encryption using ideal lattices</article-title>
          <conf-name>Proceedings of the 41st Annual ACM Symposium on Theory of computing</conf-name>
          <conf-date>May 31-June 2, 2009</conf-date>
          <conf-loc>Bethesda, MD</conf-loc>
          <pub-id pub-id-type="doi">10.1145/1536414.1536440</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bloom</surname>
            <given-names>BH</given-names>
          </name>
        </person-group>
        <article-title>Space/time trade-offs in hash coding with allowable errors</article-title>
        <source>Commun ACM</source>  
        <year>1970</year>  
        <month>07</month>  
        <volume>13</volume>  
        <issue>7</issue>  
        <fpage>422</fpage>  
        <lpage>426</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dmod.eu/deca/ft_gateway.cfm.pdf"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1145/362686.362692</pub-id></nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Schneier</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <year>2004</year>  
        <month>08</month>  
        <access-date>2018-03-02</access-date>
        <comment>Schneier on Security: Cryptanalysis of MD5 and SHA: Time for a New Standard 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.schneier.com/essays/archives/2004/08/cryptanalysis_of_md5.html">https://www.schneier.com/essays/archives/2004/08/cryptanalysis_of_md5.html</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6xcTiZQHw"/></comment> </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>HT</given-names>
          </name>
          <name name-style="western">
            <surname>Song</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Ji</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <year>2014</year>  
        <month>08</month>  
        <access-date>2018-03-03</access-date>
        <comment>Hashing for Similarity Search: A Survey 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://arxiv.org/abs/1408.2927">http://arxiv.org/abs/1408.2927</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6xdEF5JTh"/></comment> </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Indyk</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Motwani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Approximate nearest neighbors: towards removing the curse of dimensionality</article-title>
          <conf-name>Proceedings of the Thirtieth Annual ACM Symposium on Theory of computing</conf-name>
          <conf-date>May 24-26, 1998</conf-date>
          <conf-loc>Dallas, Texas</conf-loc>
          <pub-id pub-id-type="doi">10.1145/276698.276876</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Broder</surname>
            <given-names>AZ</given-names>
          </name>
          <name name-style="western">
            <surname>Charikar</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Frieze</surname>
            <given-names>AM</given-names>
          </name>
          <name name-style="western">
            <surname>Mitzenmacher</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Min-Wise Independent Permutations</article-title>
        <source>J Comput Syst Sci</source>  
        <year>2000</year>  
        <month>06</month>  
        <volume>60</volume>  
        <issue>3</issue>  
        <fpage>630</fpage>  
        <lpage>659</lpage>  
        <pub-id pub-id-type="doi">10.1006/jcss.1999.1690</pub-id></nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Chang</surname>
            <given-names>SF</given-names>
          </name>
          <name name-style="western">
            <surname>Kumar</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Hashing with graphs</article-title>
        <conf-name>Proceedings of the 28th International Conference on Machine Learning</conf-name>
        <conf-date>June 28, 2011</conf-date>
        <conf-loc>Bellevue, Washington</conf-loc>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.icml-2011.org/papers/6_icmlpaper.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lazebnik</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Iterative quantization: a procrustean approach to learning binary codes</article-title>
          <conf-name>IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>
          <conf-date>June 20-25, 2011</conf-date>
          <conf-loc>Colorado Springs, CO</conf-loc>
          <pub-id pub-id-type="doi">10.1109/CVPR.2011.5995432</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kong</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>WJ</given-names>
          </name>
        </person-group>
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Bartlett</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Weinberger</surname>
            <given-names>KQ</given-names>
          </name>
          <name name-style="western">
            <surname>Burges</surname>
            <given-names>CJ</given-names>
          </name>
          <name name-style="western">
            <surname>Bottou</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Pereira</surname>
            <given-names>FC</given-names>
          </name>
        </person-group>
        <article-title>Isotropic hashing</article-title>
        <source>Advances in Neural Information Processing Systems 25</source>  
        <year>2012</year>  
        <publisher-loc>Red Hook, NY, USA</publisher-loc>
        <publisher-name>Curran Associates, Inc</publisher-name></nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Gong</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Kumar</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Verma</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Lazebnik</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Angular quantization-based binary codes for fast similarity search</article-title>
        <conf-name>Proceedings of the 25th International Conference on Neural Information Processing Systems (NIPS) - Volume 1</conf-name>
        <conf-date>December 3-6, 2012</conf-date>
        <conf-loc>Lake Tahoe, Nevada</conf-loc>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://paperpile.com/b/4F2MYV/noQb"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>SF</given-names>
            </name>
          </person-group>
          <article-title>Semi-supervised hashing for scalable image retrieval</article-title>
          <conf-name>IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2010</conf-name>
          <conf-date>June 13-18, 2010</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/CVPR.2010.5539994</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Kumar</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Chang</surname>
            <given-names>SF</given-names>
          </name>
        </person-group>
        <article-title>Semi-supervised hashing for large-scale search</article-title>
        <source>IEEE Trans Pattern Anal Mach Intell</source>  
        <year>2012</year>  
        <month>12</month>  
        <volume>34</volume>  
        <issue>12</issue>  
        <fpage>2393</fpage>  
        <lpage>406</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.ee.columbia.edu/ln/dvmm/publications/12/PAMI_SSHASH.pdf"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1109/TPAMI.2012.48</pub-id>
        <pub-id pub-id-type="medline">22331853</pub-id></nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Fu</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Zha</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Barlow</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Semi-supervised Nonlinear Dimensionality Reduction</article-title>
        <year>2006</year>  
        <conf-name>The 23rd International Conference on Machine Learning</conf-name>
        <conf-date>June 25-29, 2006</conf-date>
        <conf-loc>Pittsburgh, Pennsylvania</conf-loc>
        <pub-id pub-id-type="doi">10.1145/1143844.1143978</pub-id></nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>YG</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>SF</given-names>
            </name>
          </person-group>
          <article-title>Supervised Hashing with Kernels</article-title>
          <conf-name>IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2012</conf-name>
          <conf-date>June 16-21, 2012</conf-date>
          <conf-loc>Providence, RI</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>SF</given-names>
            </name>
          </person-group>
          <article-title>Sequential projection learning for hashing with compact codes</article-title>
          <conf-name>The 27th International Conference on Machine Learning (ICML) 2010</conf-name>
          <conf-date>June 21-24, 2010</conf-date>
          <conf-loc>Haifa, Israel</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bronstein</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Bronstein</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Paragios</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Data fusion through cross-modality metric learning using similarity-sensitive hashing</article-title>
          <conf-name>IEEE Computer Society Conference on Computer Vision and Pattern Recognition</conf-name>
          <conf-date>June 13-18, 2010</conf-date>
          <conf-loc>San Francisco</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kumar</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Udupa</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Learning hash functions for cross-view similarity search</article-title>
        <conf-name>Proceedings of the 22nd International Joint Conference on Artificial Intelligence (IJCAI) 2011</conf-name>
        <conf-date>July 16-22, 2011</conf-date>
        <conf-loc>Barcelona, Spain</conf-loc>
        <fpage>1360</fpage> </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>HT</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Multiple Feature Hashing for Real-time Large Scale Near-duplicate Video Retrieval</article-title>
          <conf-name>Proceedings of the 19th ACM International Conference on Multimedia</conf-name>
          <conf-date>November 28-December 1, 2011</conf-date>
          <conf-loc>Scottsdale, AZ</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yeung</surname>
              <given-names>DY</given-names>
            </name>
          </person-group>
          <article-title>A probabilistic model for multimodal hash function learning</article-title>
          <conf-name>Proceedings of tThe 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining - KDD 2012</conf-name>
          <conf-date>August 12-16, 2012</conf-date>
          <conf-loc>Beijing, China</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhen</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Yeung</surname>
            <given-names>DY</given-names>
          </name>
        </person-group>
        <article-title>Co-Regularized Hashing for Multimodal Data</article-title>
        <conf-name>Proceedings of the 25th International Conference on Neural Information Processing Systems 2012 (NIPS 2012)</conf-name>
        <conf-date>December 3-6, 2012</conf-date>
        <conf-loc>Lake Tahoe, Nevada</conf-loc>
        <fpage>1376</fpage>  
        <lpage>1384</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://paperpile.com/b/4F2MYV/6Qrp"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>HT</given-names>
            </name>
          </person-group>
          <article-title>Inter-media hashing for large-scale retrieval from heterogeneous data sources</article-title>
          <conf-name>Proceedings of the 2013 International Conference on Management of Data - SIGMOD '13</conf-name>
          <conf-date>June 22-27, 2013</conf-date>
          <conf-loc>New York, NY, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2463676.2465274</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhu</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Huang</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>HT</given-names>
          </name>
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>X</given-names>
          </name>
        </person-group>
        <article-title>Linear cross-modal hashing for efficient multimedia search</article-title>
        <conf-name>Proceedings of the 21st ACM International Conference on Multimedia</conf-name>
        <conf-date>October 21-25, 2013</conf-date>
        <conf-loc>Barcelona, Spain</conf-loc>
        <fpage>143</fpage>  
        <lpage>152</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://paperpile.com/b/4F2MYV/LBef"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1145/2502081.2502107</pub-id></nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Changpinyo</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Sha</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>Similarity component analysis</article-title>
        <conf-name>Neural Information Processing Systems 2013 (NIPS 2013)</conf-name>
        <conf-date>October 2013</conf-date>
        <conf-loc>Lake Tahoe, Nevada</conf-loc>
        <fpage>1511</fpage>  
        <lpage>1519</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www-scf.usc.edu/~kuanl/papers/nips13_sca.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Horn</surname>
            <given-names>RA</given-names>
          </name>
          <name name-style="western">
            <surname>Johnson</surname>
            <given-names>CR</given-names>
          </name>
        </person-group>
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Horn</surname>
            <given-names>RA</given-names>
          </name>
        </person-group>
        <article-title>Norms for vectors and matrices</article-title>
        <source>Matrix Analysis</source>  
        <year>1990</year>  
        <publisher-loc>Cambridge, England</publisher-loc>
        <publisher-name>Cambridge University Press</publisher-name>
        <fpage>313</fpage>  
        <lpage>386</lpage> </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ortega</surname>
            <given-names>JM</given-names>
          </name>
          <name name-style="western">
            <surname>Rheinboldt</surname>
            <given-names>WC</given-names>
          </name>
        </person-group>
        <source>Iterative Solution of Nonlinear Equations in Several Variables</source>  
        <year>1970</year>  
        <publisher-loc>Philadelphia</publisher-loc>
        <publisher-name>SIAM</publisher-name></nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zehfuss</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Ueber eine gewisse determinante?</article-title>
        <source>J Appl Math Phys</source>  
        <year>1858</year>  
        <volume>3</volume>  
        <fpage>298</fpage>  
        <lpage>301</lpage> </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Goldberger</surname>
            <given-names>AL</given-names>
          </name>
          <name name-style="western">
            <surname>Amaral</surname>
            <given-names>LA</given-names>
          </name>
          <name name-style="western">
            <surname>Glass</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Hausdorff</surname>
            <given-names>JM</given-names>
          </name>
          <name name-style="western">
            <surname>Ivanov</surname>
            <given-names>PC</given-names>
          </name>
          <name name-style="western">
            <surname>Mark</surname>
            <given-names>RG</given-names>
          </name>
          <name name-style="western">
            <surname>Mietus</surname>
            <given-names>JE</given-names>
          </name>
          <name name-style="western">
            <surname>Moody</surname>
            <given-names>GB</given-names>
          </name>
          <name name-style="western">
            <surname>Peng</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Stanley</surname>
            <given-names>HE</given-names>
          </name>
        </person-group>
        <article-title>PhysioBank, PhysioToolkit, and PhysioNet: components of a new research resource for complex physiologic signals</article-title>
        <source>Circulation</source>  
        <year>2000</year>  
        <month>06</month>  
        <day>13</day>  
        <volume>101</volume>  
        <issue>23</issue>  
        <fpage>e215</fpage>  
        <lpage>e220</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://paperpile.com/b/4F2MYV/MWQ1"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1161/01.CIR.101.23.e215</pub-id></nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Han</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>WY</given-names>
          </name>
          <name name-style="western">
            <surname>Mao</surname>
            <given-names>BH</given-names>
          </name>
        </person-group>
        <article-title>Borderline-SMOTE: A New Over-Sampling Method in Imbalanced Data Sets Learning</article-title>
        <conf-name>Proceedings of the 2005 International Conference on Advances in Intelligent Computing (ICIC)</conf-name>
        <conf-date>August 23-26, 2005</conf-date>
        <conf-loc>Hefei, China</conf-loc>
        <fpage>878</fpage>  
        <lpage>887</lpage> </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Shi</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Dai</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Tang</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Ohno-Machado</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Secure Multi-pArty Computation Grid LOgistic REgression (SMAC-GLORE)</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2016</year>  
        <month>12</month>  
        <day>25</day>  
        <volume>16 Suppl 3</volume>  
        <fpage>89</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-016-0316-1"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/s12911-016-0316-1</pub-id>
        <pub-id pub-id-type="medline">27454168</pub-id>
        <pub-id pub-id-type="pii">10.1186/s12911-016-0316-1</pub-id>
        <pub-id pub-id-type="pmcid">PMC4959358</pub-id></nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
