<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i1e28842</article-id>
      <article-id pub-id-type="pmid">35049514</article-id>
      <article-id pub-id-type="doi">10.2196/28842</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Patient Representation Learning From Heterogeneous Data Sources and Knowledge Graphs Using Deep Collective Matrix Factorization: Evaluation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Elbattah</surname>
            <given-names>Mahmoud</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Kumar</surname>
            <given-names>Sajit</given-names>
          </name>
          <degrees>MBA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1331-5277</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Nanelia</surname>
            <given-names>Alicia</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3140-5575</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Mariappan</surname>
            <given-names>Ragunathan</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7242-0966</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Rajagopal</surname>
            <given-names>Adithya</given-names>
          </name>
          <degrees>BTech</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0819-874X</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Rajan</surname>
            <given-names>Vaibhav</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Information Systems and Analytics</institution>
            <institution>National University of Singapore</institution>
            <addr-line>13 Computing Drive</addr-line>
            <addr-line>Singapore, 117417</addr-line>
            <country>Singapore</country>
            <phone>65 65166737</phone>
            <email>vaibhav.rajan@nus.edu.sg</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6748-6864</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Great Learning</institution>
        <addr-line>Bengaluru</addr-line>
        <country>India</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Information Systems and Analytics</institution>
        <institution>National University of Singapore</institution>
        <addr-line>Singapore</addr-line>
        <country>Singapore</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>National Institute of Technology</institution>
        <addr-line>Thiruchirappalli</addr-line>
        <country>India</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Vaibhav Rajan <email>vaibhav.rajan@nus.edu.sg</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>1</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>20</day>
        <month>1</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>1</issue>
      <elocation-id>e28842</elocation-id>
      <history>
        <date date-type="received">
          <day>15</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>3</day>
          <month>10</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>7</day>
          <month>11</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>14</day>
          <month>11</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Sajit Kumar, Alicia Nanelia, Ragunathan Mariappan, Adithya Rajagopal, Vaibhav Rajan. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 20.01.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/1/e28842" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Patient representation learning aims to learn features, also called representations, from input sources automatically, often in an unsupervised manner, for use in predictive models. This obviates the need for cumbersome, time- and resource-intensive manual feature engineering, especially from unstructured data such as text, images, or graphs. Most previous techniques have used neural network–based autoencoders to learn patient representations, primarily from clinical notes in electronic medical records (EMRs). Knowledge graphs (KGs), with clinical entities as nodes and their relations as edges, can be extracted automatically from biomedical literature and provide complementary information to EMR data that have been found to provide valuable predictive signals.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to evaluate the efficacy of collective matrix factorization (CMF), both the classical variant and a recent neural architecture called deep CMF (DCMF), in integrating heterogeneous data sources from EMR and KG to obtain patient representations for clinical decision support tasks.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Using a recent formulation for obtaining graph representations through matrix factorization within the context of CMF, we infused auxiliary information during patient representation learning. We also extended the DCMF architecture to create a task-specific end-to-end model that learns to simultaneously find effective patient representations and predictions. We compared the efficacy of such a model to that of first learning unsupervised representations and then independently learning a predictive model. We evaluated patient representation learning using CMF-based methods and autoencoders for 2 clinical decision support tasks on a large EMR data set.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our experiments show that DCMF provides a seamless way for integrating multiple sources of data to obtain patient representations, both in unsupervised and supervised settings. Its performance in single-source settings is comparable with that of previous autoencoder-based representation learning methods. When DCMF is used to obtain representations from a combination of EMR and KG, where most previous autoencoder-based methods cannot be used directly, its performance is superior to that of previous nonneural methods for CMF. Infusing information from KGs into patient representations using DCMF was found to improve downstream predictive performance.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our experiments indicate that DCMF is a versatile model that can be used to obtain representations from single and multiple data sources and combine information from EMR data and KGs. Furthermore, DCMF can be used to learn representations in both supervised and unsupervised settings. Thus, DCMF offers an effective way of integrating heterogeneous data sources and infusing auxiliary knowledge into patient representations.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>representation learning</kwd>
        <kwd>deep collective matrix factorization</kwd>
        <kwd>electronic medical records</kwd>
        <kwd>knowledge graphs</kwd>
        <kwd>multiview learning</kwd>
        <kwd>graph embeddings</kwd>
        <kwd>clinical decision support</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Machine learning–based predictive models have been found to be highly accurate in many clinical decision support tasks. Examples include predictions of unforeseen complications [<xref ref-type="bibr" rid="ref1">1</xref>], patient severity assessment through mortality predictors [<xref ref-type="bibr" rid="ref2">2</xref>] and automated coding for billing [<xref ref-type="bibr" rid="ref3">3</xref>], and prediction of patient outcomes [<xref ref-type="bibr" rid="ref4">4</xref>], to name a few. The key ingredients of these models are the features used to describe patients for whom predictions are required. The traditional approach for building these features is to handcraft them typically in collaboration with a domain expert. However, with the growing amount, complexity, and diversity of clinical information sources, such manual feature engineering is practically infeasible. For instance, in electronic medical records (EMRs), patient information may be distributed among laboratory tests, nursing notes, radiology images and reports, genomic data, and other data sources.</p>
        <p>Representation learning aims to learn features or representations from the given input sources automatically, often in an unsupervised manner. This obviates the need for manual feature engineering and is particularly useful with unstructured data sources such as clinical notes. These real-valued vectorial representations can be used as features directly in machine learning models for various downstream tasks such as prediction or cluster detection. Such representation learning has been found to be effective in several predictive models, for example, disease category prediction [<xref ref-type="bibr" rid="ref5">5</xref>] and mortality prediction [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
        <p>Previous studies have primarily used clinical notes to learn patient representations. Clinical notes are a rich source of information containing detailed subjective and objective evaluations of patient conditions during the hospital stay. Some previous studies have also combined other structured tables from EMR with features extracted from notes to obtain patient representations [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref5">5</xref>] or to mine clinical information such as drug mentions [<xref ref-type="bibr" rid="ref7">7</xref>]. Many of these studies have used variants of deep neural architecture based on autoencoders to obtain unsupervised patient representations.</p>
        <p>When information from multiple heterogeneous sources is available, predictive models benefit from latent representations that systematically model correlated shared structures. The aim of multi-view learning is to effectively build such latent representations, where views refer to measurements for the same subjects that differ in source, datatype, or modality; heterogeneous data sources within EMR provide such multiple views of patients. A general technique for multi-view representation learning from arbitrary collections of heterogeneous data sources is collective matrix factorization (CMF) [<xref ref-type="bibr" rid="ref8">8</xref>]. CMF can be used to obtain patient representations from multi-view EMR data and can also be used to seamlessly integrate auxiliary information from external sources.</p>
        <p>One such auxiliary source of information is a clinical knowledge graph (KG) that has been found to be valuable for improving both the accuracy and interpretability of predictive models. These KGs have clinical entities (eg, diseases, drugs, and biomolecules) as nodes and different kinds of relations (eg, treats, predisposes, and causes) as edges. They can be automatically created from various sources such as biomedical literature and web-based health portals. Representation learning methods have also been developed for graph inputs that can automatically learn vectorial representations of nodes to incorporate the global structural and semantic properties of the graph. These node representations can then be used in machine learning models for graph analytics such as community detection or node classification. Owing to its wide applicability, a large number of graph representation learning techniques have been developed for various classes of graphs, including KGs.</p>
        <p>In this paper, we analyze patient representation learning in light of 2 recent advances in CMF and KG representation learning. A deep autoencoder-based architecture, called deep CMF (DCMF), was developed for CMF, which was found to outperform classical nonneural variants of CMF in several tasks [<xref ref-type="bibr" rid="ref9">9</xref>]. Using DCMF, which provides a seamless way of integrating heterogeneous data, we evaluate the effectiveness of patient representations when the input data are augmented with additional information from literature-derived KGs. The generality of DCMF allows many different ways of using KG as inputs; however, not all of them are equally effective. Recently, it has been shown that many graph representation learning methods can be reformulated as a matrix factorization problem. Leveraging this formulation within the context of CMF and DCMF, we infuse auxiliary information during patient representation learning. To our knowledge, this is the first study to use this technique to obtain clinical KG representations and use it within the DCMF framework to obtain patient representations.</p>
        <p>Furthermore, the DCMF architecture can easily be extended to create a task-specific end-to-end model that learns to simultaneously find effective patient representations and predictions. We also compare the efficacy of such a model to that of a 2-stage process of first learning unsupervised representations and then independently learning a predictive model.</p>
        <p>We rigorously evaluate patient representation learning using DCMF-based methods and autoencoders for 2 clinical decision support tasks on EMR data comprising 28,563 patient episodes. The first task is that of primary diagnosis category prediction, which is performed during coding from discharge summaries when a patient is discharged from the hospital for billing and reimbursement purposes. The second task is that of mortality (risk of death) prediction, which can be used to identify high-risk patients and prioritize their care.</p>
        <p>The utility of DCMF-based patient representations, obtained from only EMR data and a combination of KGs and EMR data in these 2 tasks, is empirically analyzed and discussed.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <sec>
          <title>Representation Learning</title>
          <p>Statistical machine learning models typically assume inputs as feature vectors. To obviate the need for cumbersome, time- and resource-intensive manual feature engineering, especially from unstructured data such as text, images, or graphs, representation learning aims to learn features or representations from the input directly, often in an unsupervised manner. These real-valued vectorial representations can be used as features directly in machine learning models for various downstream tasks such as prediction or cluster detection.</p>
          <p>Representation learning has been successfully used in many domains, such as natural language processing (NLP) [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>], multimodal learning [<xref ref-type="bibr" rid="ref12">12</xref>], social network analysis [<xref ref-type="bibr" rid="ref13">13</xref>], and bioinformatics [<xref ref-type="bibr" rid="ref14">14</xref>]. In addition, representation learning has been applied within medical informatics to learn patient representations from clinical notes [<xref ref-type="bibr" rid="ref6">6</xref>], EMR data [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref5">5</xref>], clinical time series [<xref ref-type="bibr" rid="ref15">15</xref>], and clinical KGs [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>].</p>
          <p>Autoencoder-based neural architectures have been used in most methods to learn patient representations. Miotto et al [<xref ref-type="bibr" rid="ref5">5</xref>] used stacked denoising autoencoders (SDAE) to learn patient representations from both structured EMR data and topics extracted from clinical notes. Dubois et al [<xref ref-type="bibr" rid="ref18">18</xref>] obtained note-level representations from clinical notes and combined them to form patient representations. Suresh et al [<xref ref-type="bibr" rid="ref19">19</xref>] evaluated different autoencoder architectures to find patient phenotypes. Sushil et al [<xref ref-type="bibr" rid="ref6">6</xref>] evaluated SDAE and Doc2vec representations, both independently and together, to obtain patient representations from clinical notes.</p>
          <p>An autoencoder is a simple feedforward neural network that learns to reconstruct its input; it does so by first encoding the input into a dense, low-dimensional vector, also called bottleneck (which is used as the representation after training), and then decoding the bottleneck into the output. The network is trained to make the output as close as possible to the input. Both the encoder and decoder are implemented using neural networks. When there are multiple sources of patient information, such as demographic data, laboratories, and medications, they can be concatenated and provided as input to an autoencoder. A denoising autoencoder uses corrupted versions of inputs and is trained to reconstruct the uncorrupted version. SDAE is a variant based on stacking layers of denoising autoencoders, which are trained locally to denoise corrupted versions of their inputs [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
          <p>In a different approach for combining multiple data sources, patient representations based on CMF were used in the study by Huddar et al [<xref ref-type="bibr" rid="ref1">1</xref>] to combine multiple EMR matrices with features extracted from clinical notes. These representations were found to be effective in predicting postoperative acute respiratory failure in intensive care unit (ICU) patients.</p>
        </sec>
        <sec>
          <title>DCMF Architecture</title>
          <p>In multi-view learning, views refer to measurements for the same subjects that differ in source, datatype, or modality. CMF is a general technique for learning shared representations from arbitrary collections of heterogeneous data sources [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
          <p>For a single matrix X<sub>m×n</sub> containing m rows and n columns, low-rank factorization aims to obtain latent factors U<sub>m×k’</sub> and V<sub>n×k’</sub> such that X≈UV<sup>T</sup>, where the latent dimension k&#60;min(m,n). The latent factors can be viewed as low-dimensional representations of the row and column entities. For example, if X is a matrix containing diagnoses of m patients, where each patient can have n≥1 diagnoses, the factors provide k-dimensional representations of patients (in U) and diseases (in V). The factors are typically learned by solving the optimization problem: <inline-graphic xlink:href="medinform_v10i1e28842_fig8.png" xlink:type="simple" mimetype="image"/>, where <italic>l</italic> denotes a loss function.</p>
          <p>CMF generalizes this idea of single matrix factorization for an arbitrary collection of matrices. The input to the CMF is a collection of matrices, where each matrix, representing a view, has a relationship between 2 entity types along each matrix dimension, and entity types may be involved in multiple views. CMF collectively factorizes the input set of matrices to learn a low-rank latent representation for each entity type from all the views in which the entity type is present. As the CMF models arbitrary collections of matrices, this setting is also referred to as <italic>augmented multi-view learning</italic>.</p>
          <p>A model for CMF based on deep learning was developed by Mariappan and Rajan [<xref ref-type="bibr" rid="ref9">9</xref>], which is briefly described next. Given M matrices (indexed by m) that describe the relationships between E entities (indexed by e), each with dimension d<sub>e,</sub> DCMF jointly obtains latent representations of each entity U<sub>e</sub> and low-rank factorizations of each matrix <inline-graphic xlink:href="medinform_v10i1e28842_fig9.png" xlink:type="simple" mimetype="image"/> such that U<sup>e</sup>=f<sub>θ</sub> ([C]<sup>(e)</sup>), where f<sub>θ</sub> is an entity-specific nonlinear transformation, obtained through a neural network–based encoder with weights θ and [C]<sup>(e)</sup> denotes all matrices in the collection that contain a relationship of entity e. The entities corresponding to the rows and columns of the m<sup>th</sup> matrix are denoted by indices r<sub>m</sub> and c<sub>m</sub>, respectively.</p>
          <p>There are 2 steps in DCMF model construction:</p>
          <list list-type="order">
            <list-item>
              <p>Input transformation: For each entity e, we create a new matrix C<sup>(e)</sup>, which we call a concatenated matrix, by concatenating all the matrices containing entity e.</p>
            </list-item>
            <list-item>
              <p>Network construction: We then use E (dependent) autoencoders to obtain the latent factors U<sub>e</sub> from the concatenated matrices C<sup>(e)</sup>. For each entity e, our network has an autoencoder whose input is C<sup>(e)</sup>, and the decoding is represented by C<sup>(e)’</sup>. The bottleneck or encoding of each autoencoder, after training, forms the latent factor U<sub>e</sub>.</p>
            </list-item>
          </list>
          <p>The latent factors are learned by training all the autoencoders together by solving the following equation:</p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i1e28842_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where l<sub>E</sub> is the reconstruction loss between the autoencoder’s input C<sup>(e)</sup> and the decoding C<sup>(e)’</sup>; l<sub>R</sub> is the matrix reconstruction loss, where the reconstructed matrix <inline-graphic xlink:href="medinform_v10i1e28842_fig11.png" xlink:type="simple" mimetype="image"/> of the m<sup>th</sup> view is obtained by multiplying the associated row and column entity representations <inline-graphic xlink:href="medinform_v10i1e28842_fig12.png" xlink:type="simple" mimetype="image"/> and <inline-graphic xlink:href="medinform_v10i1e28842_fig13.png" xlink:type="simple" mimetype="image"/>. <xref rid="figure1" ref-type="fig">Figure 1</xref> shows a schematic of the model construction steps for an example comprising 5 matrices.</p>
          <p>Collective training of all autoencoders induces dependencies between the autoencoder networks, which may result in simultaneous underfitting in some networks and overfitting in other networks. This makes collective learning of all latent representations challenging and, to scale to arbitrary collections of matrices, necessitates automatic hyperparameter selection. We address these optimization challenges through multitask Bayesian optimization (details can be found in the study by Mariappan and Rajan [<xref ref-type="bibr" rid="ref9">9</xref>]).</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Schematic of supervised deep collective matrix factorization architecture for an example input of 5 matrices, 6 entities. Top: input matrices and a graph showing the entities present in each matrix. Bottom: for each entity, matrices containing that entity (as row or column) are concatenated (shaded) and then given as input to the autoencoder. All autoencoders are trained collectively.</p>
            </caption>
            <graphic xlink:href="medinform_v10i1e28842_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Graph Embeddings</title>
          <p>Representation learning from graphs aims to learn low-dimensional real-valued features of its nodes, also called graph embeddings, to capture the global structural information and semantic properties in the graph. Many representation learning methods have been proposed for homogeneous graphs, where nodes and edges are both of a single type, for example, DeepWalk [<xref ref-type="bibr" rid="ref21">21</xref>] and Node2Vec [<xref ref-type="bibr" rid="ref22">22</xref>]. Many real-world interactions, including those found in clinical KGs, give rise to heterogeneous information networks (HINs) where nodes and edges can be of different types. Representation learning methods for such graphs have also been developed, for example, Metapath2vec [<xref ref-type="bibr" rid="ref23">23</xref>] and Heterogeneous Graph Neural Network [<xref ref-type="bibr" rid="ref24">24</xref>]. Cui et al [<xref ref-type="bibr" rid="ref25">25</xref>] and Cai et al [<xref ref-type="bibr" rid="ref26">26</xref>] described general surveys, Yang et al [<xref ref-type="bibr" rid="ref27">27</xref>] described a survey on HIN embeddings, and Wang et al [<xref ref-type="bibr" rid="ref28">28</xref>] described a survey on representation learning of KGs.</p>
          <p>The key underlying idea of many of these techniques is to learn the similarities or correlations between nodes in the input network and approximate them at the latent level in the embeddings. Many network embedding techniques are equivalent to the factorization of a node similarity matrix with suitable definitions of similarities [<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        </sec>
        <sec>
          <title>Knowledge Graphs</title>
          <p>Knowledge bases and ontologies systematically organize the wealth of available biomedical knowledge. For instance, the Unified Medical Language System (UMLS) Metathesaurus [<xref ref-type="bibr" rid="ref30">30</xref>] contains &#62;5 million clinical concepts, identified by controlled unique identifiers (CUIs) and organized into several structured ontologies. Biomedical knowledge is growing at a rapid rate—MEDLINE, the largest index of medical literature, contains &#62;24 million articles with &#62;1.8 million new articles published annually [<xref ref-type="bibr" rid="ref31">31</xref>]. One cannot possibly assimilate all the knowledge, even in a narrow domain that is growing at such a tremendous pace, let alone find novel connections. To facilitate automated knowledge discovery, hypothesis generation, and predictive modeling from such an enormous and rapidly growing source, automated techniques to extract and organize knowledge into KGs have been developed.</p>
          <p>These KGs contain clinical entities as nodes and the relations between entities as edges. As there are different kinds of clinical entities (eg, diseases, drugs, and biomolecules) and different kinds of relations (eg, treats, predisposes, and causes), such KGs are essentially HINs. Examples include Hetionet [<xref ref-type="bibr" rid="ref32">32</xref>], which comprises 47,031 nodes of 11 types and 2,250,197 relationships of 24 types; KnowLife [<xref ref-type="bibr" rid="ref33">33</xref>], which contains &#62;500,000 relations for 13 node types, covering genes, organs, diseases, symptoms, and treatments, as well as environmental and lifestyle risk factors; and Semantic Medline Database (SemMedDB) [<xref ref-type="bibr" rid="ref34">34</xref>], which contains approximately 94 million relations automatically extracted from approximately 27.9 million PubMed abstracts.</p>
          <p>In this study, we used the SemMedDB, which, through the use of NLP techniques, automatically creates a KG from biomedical literature. In SemMedDB, clinical concepts are identified in PubMed abstracts through entity recognition algorithms and then mapped to their CUIs. Various heuristics are used to infer the relations between concepts [<xref ref-type="bibr" rid="ref35">35</xref>]. SemMedDB infers 30 different kinds of relations that are organized into <italic>subject-predicate-object</italic> triplets (eg, drugA–TREATS–diseaseB), where both the subject and object are clinical concepts, and the predicate is a relation. These triplets form an HIN comprising multiple vertex types (clinical concepts) and multiple edge types (predicates).</p>
          <p>Biomedical knowledge, in various forms, including KGs, has been used in clinical predictive models. For instance, the International Classification of Diseases (ICD) hierarchy, which represents relationships across diseases, has been used for diagnosis prediction [<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>]. Recently, domain knowledge–guided recurrent neural network, a recurrent neural network architecture, was proposed [<xref ref-type="bibr" rid="ref39">39</xref>], where embeddings from a general KG were used internally for initialization. Most of these approaches have specialized architectures for predictive tasks and are not designed to obtain patient representations from heterogeneous collections of data.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Supervised DCMF</title>
        <p>We extended the unsupervised DCMF model to incorporate task-specific supervision. This allowed us to learn entity representations that are influenced by the target variables provided for the predictive task. Furthermore, this creates a predictive model that can seamlessly learn from arbitrary collections of matrices. We assumed that the predictive task, for example, regression or classification, is with respect to one entity only. In the case of clinical tasks, this entity is most often patients. All other data, such as EMRs and KGs, can be used as inputs from which a predictive model for patients can be built. Examples include predicting the length of stay (regression) or the risk of an unforeseen complication (classification).</p>
        <p>The DCMF architecture is extended by adding an additional task-specific layer that takes as input the latent representation of the entity for which labels are provided. This layer is provided with labels during training and is trained along with the rest of the network. Let e<sub>p</sub> be the specific entity (eg, patients) for which task-specific labels y<sub>T</sub> are provided for a task T. Let <inline-graphic xlink:href="medinform_v10i1e28842_fig14.png" xlink:type="simple" mimetype="image"/> be the bottleneck of the autoencoder corresponding to the entity e<sub>p</sub>. The network is constructed as described above with the addition of a single network layer that takes <inline-graphic xlink:href="medinform_v10i1e28842_fig15.png" xlink:type="simple" mimetype="image"/> as input and has an activation layer depending on the task and loss function (eg, sigmoid for classification and linear for regression). There is a task-specific loss l<sub>T</sub>(y<sub>T</sub>,y’) associated with this layer that is also task dependent (eg, cross-entropy for classification and mean-squared error for regression), where y’ denotes the network’s predictions. The supervised latent representations are now learned by solving the following equation:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i1e28842_fig16.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>Collective training of all autoencoders is performed in exactly the same way as in DCMF but with the new loss function as given above. During prediction, new inputs for entity e<sub>p</sub> may be given along with all other auxiliary data, and the additional layer’s outputs can be used as predictions.</p>
        <p><xref rid="figure1" ref-type="fig">Figure 1</xref> shows a schematic of the model. There are 5 input matrices containing pairwise relations across 6 entities. The graph at the top shows the associations between entities and matrices. One of the entities (shaded) is associated with the labels for a classification task. The network comprises 6 autoencoders, as shown at the bottom, 1 for each entity. The input to the autoencoders is from the concatenated matrix corresponding to each entity (shown in the input transformation part). The bottleneck layer from the first autoencoder is used as input to a network layer that uses the provided labels during training. Note that this illustration shows a specific example of 5 matrices; however, the DCMF model can be used with any collection of input matrices.</p>
      </sec>
      <sec>
        <title>Combined Data-Driven and Knowledge-Based Representation Learning Using DCMF</title>
        <p>Any graph may be represented by its adjacency matrix. However, factorization of this adjacency matrix may not yield effective representations. We also observed this empirically in our experiments. Another way of using KGs is to first obtain graph embeddings and then use the embeddings within the CMF. We experimented with TransE [<xref ref-type="bibr" rid="ref40">40</xref>] and found that this did not yield effective representations. To obtain good representations, we used the technique used previously by Liu et al [<xref ref-type="bibr" rid="ref29">29</xref>]. The key idea was to compute the similarities between the nodes in the graphs and obtain representations by factorizing the similarity matrices.</p>
        <p>The global resource allocation (GRA) similarity, between 2 nodes in a graph, was proposed by Liu et al [<xref ref-type="bibr" rid="ref29">29</xref>] with the aim of having similar embeddings for similar nodes and generalizing previous metrics. We found similarities between diseases, medications, and procedures (separately) from the SemMedDB KG using the GRA similarity. These similarity matrices are provided as input to CMF-based methods that internally factorize all the matrices collectively, as shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Schematic of combined data-driven knowledge-based representation learning. Pairwise Global Resource Allocation similarities among clinical entities are computed from the knowledge graph. Patient representations are learnt from these similarity matrices and the input electronic health record data collectively using Collective Matrix Factorization-based methods. CMF: Collective Matrix Factorization; EHR: electronic health record.</p>
          </caption>
          <graphic xlink:href="medinform_v10i1e28842_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>We now provide an intuitive explanation of GRA similarity and explain why it is a good measure for clinical KGs; a more technical description can be found in the study by Liu et al [<xref ref-type="bibr" rid="ref29">29</xref>]. The similarity between 2 nodes <italic>i</italic> and <italic>j</italic> is computed based on the paths that exist between them. Such a global measure can be applied to any 2 nodes in the graph, irrespective of their distance within the graph. In contrast, local measures, such as the number of common neighbors, often yield ineffective embeddings as many node pairs may have the same scores. This is particularly true for dense clinical KGs.</p>
        <p>The similarity score depends on (1) the number of paths, (2) the length of the paths, and (3) the node degrees of the intermediate nodes in each path. For each path between i and j, its contribution is equal to the reciprocal of the product of the degrees of the intermediate nodes of the path. Let p<sup>l</sup>(i,j) be a path of length l between nodes i and j, and let the intermediate nodes be i<sub>1</sub>,i<sub>2</sub>,...i<sub>{l–2}</sub>. Let k(i) denote the degree of node i, that is, the number of edges incoming to or outgoing from <italic>i</italic>. The contribution of a path c(p<sup>l</sup>) is defined as follows:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i1e28842_fig17.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>In this manner, paths that contain high-degree nodes have higher denominators, and their contributions are decreased. This is justified as high-degree nodes connect many different nodes and thus affect many paths. Therefore, paths that do not contain such high-degree nodes should contribute to the higher similarity between the nodes. The final GRA similarity is the sum of the contributions over all paths weighted by a factor that decays exponentially with path length:</p>
        <disp-formula>
          <graphic xlink:href="medinform_v10i1e28842_fig18.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>By exponentially decaying the weights, shorter paths are assigned higher weights. Thus, both the number and length of the paths are accounted for in the similarity measure.</p>
        <p>Liu et al [<xref ref-type="bibr" rid="ref29">29</xref>] showed that this technique generalizes and outperforms many previous graph embedding methods. To our knowledge, ours is the first study to use this technique to obtain clinical KG representations and use it within a collective matrix factorization setting to obtain patient representations.</p>
      </sec>
      <sec>
        <title>Experiment Settings</title>
        <p><xref rid="figure3" ref-type="fig">Figure 3</xref> shows a schematic of the experimental settings. We considered 3 views: 1, 2, and 3. View 1 comprises data extracted from clinical notes that have been used for patient representation learning in several previous studies. In view 2, data from SemMedDB KGs were extracted as described above and added to the data from view 1. In view 3, structured data from the EMR were also added to obtain patient representations. In the following section, we evaluate the performance of representations learned from these 3 views in 2 clinical decision support tasks.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Views 1, 2, and 3 used to obtain patient representations. EMR: electronic medical record; SemMedDB: Semantic Medline Database.</p>
          </caption>
          <graphic xlink:href="medinform_v10i1e28842_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data</title>
        <sec>
          <title>Overview</title>
          <p>We used the Medical Information Mart for Intensive Care (MIMIC) III database [<xref ref-type="bibr" rid="ref41">41</xref>], which contains clinical data of &#62;40,000 patients admitted to the ICUs in the Beth Israel Deaconess Medical Center in Boston, Massachusetts, between 2001 and 2012. The data were extracted and deidentified in compliance with the Health Insurance Portability and Accountability Act standards [<xref ref-type="bibr" rid="ref41">41</xref>]. We excluded patients with &#62;1 hospital stay at MIMIC-III. Patients aged &#60;18 years were also excluded. A total of 28,563 patient episodes were used.</p>
        </sec>
        <sec>
          <title>Clinical Notes Preprocessing</title>
          <p>The NOTEEVENTS table in MIMIC-III contains all clinical notes for patients. It contains a column called IS_ERROR. A value of 1 in this column for a note indicates that a physician has identified the note as an error. Using this value, we first excluded notes that were considered erroneous. The CATEGORY column in the table indicates the type of note recorded. Discharge summaries often contain detailed information about the patient’s stay, including diagnoses that are used for billing. As we wanted to predict the diagnosis category automatically from the clinical notes, we excluded all the notes that had been categorized as discharge summaries. The remaining notes were used in our analysis.</p>
          <p>The timestamp of a clinical note is obtained from the CHARTTIME and CHARTDATE columns in the NOTEEVENTS table. They recorded the time and date, respectively, at which the notes were charted. Notes are contained in the TEXT column of the NOTEEVENTS table. To efficiently process the notes, they were aggregated over time intervals of 6 hours, starting from the time of ICU admission, and stored as text files. These text files were provided as input to the cTakes software (Apache) [<xref ref-type="bibr" rid="ref42">42</xref>], which identifies clinical concepts in the input text and provides their CUI values. The software identifies several concept types, such as anatomical site, disease disorder, medication, procedure, and sign–symptoms. We considered only 3 concept types—medication, procedure, and disease–disorder—for our analysis.</p>
          <p>For each of the 3 concept types, we constructed a separate matrix, where each row corresponded to a patient episode and the columns corresponded to CUI for the clinical entity. Note that concepts identified from all the notes of a patient episode were considered together to construct the row in the matrix. The disease matrix is binary, indicating the presence or absence of the CUI in the text. Thus, a 1 in the ij-th cell of the matrix indicates the presence of the j-th CUI in a note of the i-th patient episode. The medication and procedure matrices are count matrices, where each cell indicates the number of times the corresponding CUI is mentioned in the text. The total number of CUIs (ie, columns) in the disease, medication, and procedure matrices was 6604. The matrices were transformed to obtain term frequency-inverse document frequency vectors, where each identified CUI was considered a term, and all the considered notes for each patient episode were considered a document.</p>
        </sec>
        <sec>
          <title>SemMedDB Preprocessing</title>
          <p>SemMedDB contains 30 different kinds of relations that are organized into subject-predicate-object triplets (eg, drugA–TREATS–diseaseB), where both the subject and object are clinical concepts, and the predicate is a relation. The PREDICATION table in SemMedDB contains all the triplets, 1 in each row. The columns SUBJECT_CUI, PREDICATE, and OBJECT_CUI were used to identify the CUI of the subject, predicate, and object, respectively, for each triple. As described earlier, our aim was to obtain a set of triplets to inform us of pairwise relationships across diseases, medications, and procedures for the patient data obtained from MIMIC-III.</p>
          <p>As the database is very large, we excluded some relations that were not directly related to clinical concepts in the patient data. These predicates included (1) PART_OF, indicating that a physical unit is a part of a larger unit; (2) LOCATION_OF, indicating the site or region of an entity; and (3) PROCESS_OF, indicating the organism in which a process occurs. In addition, all negations of the predicates in SemMedDB, which begin with NEG, were not considered. More details of these ontological predicates can be found in the study by Kilicoglu et al [<xref ref-type="bibr" rid="ref34">34</xref>]. The rows containing these predicates were removed from the table. From the remaining rows, only those rows where both the subject and object CUIs were present in the 6604 CUIs used in the patient data were considered; the other rows were excluded.</p>
          <p>The final set of triplets was used to construct an undirected graph in the following steps. All clinical concepts present as subjects or objects in the triplets were used as nodes. An edge was added to the graph between nodes u and v if there was a predicate with subject u and object v in the considered triplets. Note that there may be multiple triples between the same subject and object if there are different types of relations. The edges in our graph only indicated the existence of a relation and did not describe the type. Thus, our constructed KG had 6604, 4653, and 3406 nodes of 3 types—disease, medication, and procedure, respectively—and 51,326,066 edges among them. This graph was used to construct GRA similarity matrices, as described earlier for diseases, medications, and procedures.</p>
        </sec>
        <sec>
          <title>Structured EMR Data</title>
          <p>The prescriptions and laboratory events tables from MIMIC for the selected episodes were used directly. UMLS CUIs for medications were fetched by invoking the representational state transfer application programming interface from RxNorm [<xref ref-type="bibr" rid="ref43">43</xref>]. The UMLS CUIs for laboratories were obtained using the MRCONSO file from UMLS [<xref ref-type="bibr" rid="ref30">30</xref>]. Thus, we obtained 1841 and 242 CUIs for medications and laboratories, respectively.</p>
        </sec>
      </sec>
      <sec>
        <title>Evaluation</title>
        <sec>
          <title>Overview</title>
          <p>We evaluated the performance of the models by constructing randomly selected held-out test sets. We split the patient episodes into 90% as training set and 10% as test set. A total of 3 different 90 to 10 splits were randomly generated, and all results shown were averaged over these 3 test sets.</p>
        </sec>
        <sec>
          <title>Clinical Decision Support Tasks</title>
          <p>Predictive performance was evaluated on 2 clinical decision support tasks.</p>
          <p>The first task was that of the primary diagnosis category prediction. When a patient is discharged from the hospital, clinical coders use clinical and demographic data in EMR to assign codes in a standard format, such as ICD, for billing and reimbursement purposes. Several factors such as disease etiology, anatomical site, and severity are used in coding algorithms [<xref ref-type="bibr" rid="ref44">44</xref>]. This is a time-consuming and error-prone process, and mistakes can lead to claim denials and underpayment for hospitals [<xref ref-type="bibr" rid="ref45">45</xref>]. As a result, many methods have been developed for automated ICD coding [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. An important code, from a billing perspective, that needs to be ascertained is the primary diagnosis (the reason for hospitalization). Following the study by Sushil et al [<xref ref-type="bibr" rid="ref6">6</xref>], we predicted the category of primary diagnosis, where the categories were grouped into 18 generic categories that corresponded to diagnosis-related groups [<xref ref-type="bibr" rid="ref48">48</xref>]. We modeled this as a multilabel classification task.</p>
          <p>Our second task was that of mortality (risk of death) prediction. At the individual patient level, such models can be used to identify high-risk patients and prioritize their care within the ICU. It can also aid in critical decisions such as interrupting treatments or providing do-not-resuscitate orders [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]. MIMIC-III provides 3 different mortality labels: in-hospital, 1-month, and 1-year mortality. We used 1-year mortality, which had the least class imbalance. The label indicates whether a patient died within 1 year of discharge from the hospital. Thus, this was a binary classification task.</p>
          <p>The label distributions for both the data sets are shown in <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Label distribution for 1-year mortality prediction task.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="100"/>
              <col width="400"/>
              <col width="500"/>
              <thead>
                <tr valign="top">
                  <td>Label</td>
                  <td>Meaning</td>
                  <td>Episodes, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>0</td>
                  <td>Not expired within 1 year after discharge</td>
                  <td>25,071 (87.79)</td>
                </tr>
                <tr valign="top">
                  <td>1</td>
                  <td>Expired within 1 year after discharge</td>
                  <td>3487 (12.21)</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Label distribution for diagnosis category prediction task.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="70"/>
              <col width="630"/>
              <col width="300"/>
              <thead>
                <tr valign="top">
                  <td>Label</td>
                  <td>Meaning</td>
                  <td>Episodes, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>0</td>
                  <td>Infection and parasitic diseases</td>
                  <td>2067 (7.24)</td>
                </tr>
                <tr valign="top">
                  <td>1</td>
                  <td>Neoplasms</td>
                  <td>2202 (7.71)</td>
                </tr>
                <tr valign="top">
                  <td>2</td>
                  <td>Endocrine, nutritional, and metabolic diseases and immunity disorders</td>
                  <td>616 (2.16)</td>
                </tr>
                <tr valign="top">
                  <td>3</td>
                  <td>Diseases of blood and blood-forming organs</td>
                  <td>96 (0.34)</td>
                </tr>
                <tr valign="top">
                  <td>4</td>
                  <td>Mental disorders</td>
                  <td>273 (0.96)</td>
                </tr>
                <tr valign="top">
                  <td>5</td>
                  <td>Diseases of nervous system and sense organs</td>
                  <td>487 (1.71)</td>
                </tr>
                <tr valign="top">
                  <td>6</td>
                  <td>Diseases of the circulatory system</td>
                  <td>11,249 (39.39)</td>
                </tr>
                <tr valign="top">
                  <td>7</td>
                  <td>Diseases of the respiratory system</td>
                  <td>2031 (7.11)</td>
                </tr>
                <tr valign="top">
                  <td>8</td>
                  <td>Diseases of the digestive system</td>
                  <td>2614 (9.15)</td>
                </tr>
                <tr valign="top">
                  <td>9</td>
                  <td>Diseases of the genitourinary system</td>
                  <td>505 (1.77)</td>
                </tr>
                <tr valign="top">
                  <td>10</td>
                  <td>Complications of pregnancy, childbirth, and the puerperium</td>
                  <td>119 (0.42)</td>
                </tr>
                <tr valign="top">
                  <td>11</td>
                  <td>Diseases of the skin and subcutaneous tissue</td>
                  <td>75 (0.26)</td>
                </tr>
                <tr valign="top">
                  <td>12</td>
                  <td>Diseases of the musculoskeletal system and connective tissue</td>
                  <td>372 (1.3)</td>
                </tr>
                <tr valign="top">
                  <td>13</td>
                  <td>Congenital anomalies</td>
                  <td>217 (0.76)</td>
                </tr>
                <tr valign="top">
                  <td>14</td>
                  <td>Certain conditions originating in the perinatal period</td>
                  <td>0 (0)</td>
                </tr>
                <tr valign="top">
                  <td>15</td>
                  <td>Symptoms, signs, and ill-defined conditions</td>
                  <td>333 (1.17)</td>
                </tr>
                <tr valign="top">
                  <td>16</td>
                  <td>Injury and poisoning</td>
                  <td>5210 (18.24)</td>
                </tr>
                <tr valign="top">
                  <td>17</td>
                  <td>Supplementary factors influencing health status and contact with health services</td>
                  <td>85 (0.3)</td>
                </tr>
                <tr valign="top">
                  <td>18</td>
                  <td>Supplementary classification of external causes of injury and poisoning</td>
                  <td>7 (0.02)</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
        <sec>
          <title>Models Compared</title>
          <p>We compared 3 models to obtain patient representations. The first was the SDAE that has been used in several previous studies. It was also found to have good performance in representation learning from clinical notes for our selected tasks [<xref ref-type="bibr" rid="ref6">6</xref>]. Note that the SDAE cannot be used when KG matrices are used.</p>
          <p>The other 2 models are the nonneural versions of CMF and DCMF, which can be used in all 3 views. All 3 models were unsupervised learning methods. The representations learned from these methods can be used to train any off-the-shelf classifier. We evaluated the performance using 2 classifiers: random forest [<xref ref-type="bibr" rid="ref50">50</xref>] and logistic regression. We also evaluated DCMF in the extended supervised mode, where no additional classifier was required.</p>
          <p>The SDAE was trained following the implementation of Vincent et al [<xref ref-type="bibr" rid="ref20">20</xref>]. A single hidden layer was used with an embedding dimension of 300, with sigmoid encoding activation and linear decoding activation. The network was trained using the RMSprop optimizer with a batch size of 32, 0.4 dropout [<xref ref-type="bibr" rid="ref51">51</xref>], mean square error loss function, and for 20 epochs. DCMF, both supervised and unsupervised, was trained using a single hidden layer in each entity’s autoencoder, with tanh activation functions. The weight decay of 1e-6 was used with a learning rate of 1e-5. The network was trained using the Adam [<xref ref-type="bibr" rid="ref52">52</xref>]. The R package for CMF [<xref ref-type="bibr" rid="ref53">53</xref>] was used with default parameters.</p>
        </sec>
        <sec>
          <title>Evaluation Metrics</title>
          <p>Diagnosis category prediction was a multilabel classification task, and we used the standard metrics of accuracy, macro F1, and weighted F1 scores. The F1 score is the harmonic mean of precision and recall. Macro F1 is the unweighted mean of the F1 score for each label. Weighted F1 determines the mean weighted by the number of true instances for each label.</p>
          <p>Mortality prediction is a binary classification task, and we use the F1 score and area under the receiver operating characteristic (AUC) curve as evaluation metrics. The AUC shows the overall classifier performance at different thresholds that trade-off sensitivity for specificity.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview</title>
        <p>We first present the results of the diagnosis category prediction and then mortality prediction. For each task, we visually present the results in 2 ways: one organized by view and another organized by method. The former allowed us to compare methods within each view, and the latter allowed us to compare views within each method.</p>
      </sec>
      <sec>
        <title>Diagnosis Category Prediction</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the results of the diagnosis category prediction. In view 1, predictions using supervised DCMF yielded &#62;30% improvement in macro-F1 scores compared with classifiers with SDAE-based representations. In views 2 and 3, considerable improvement, ranging from 82% to 1955% in macro-F1 scores, was observed over other methods that separately learned representations and classifiers. In view 1, the accuracy and weighted F1-score of supervised DCMF were comparable with those obtained from classifiers trained on SDAE-based representations. However, with the addition of knowledge matrices in view 3, which can be performed seamlessly, supervised DCMF surpassed their performance.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Results of diagnosis category prediction.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="220"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Model and view</td>
                <td>Accuracy (%)</td>
                <td>F1 score-macro (%)</td>
                <td>F1 score-weighted (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>View 1</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SDAE<sup>a</sup> LR<sup>b</sup></td>
                <td>68.25</td>
                <td>29.99</td>
                <td>64.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SDAE RF<sup>c</sup></td>
                <td>63.03</td>
                <td>22.74</td>
                <td>57.79</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CMF<sup>d</sup> LR</td>
                <td>6.66</td>
                <td>0.99</td>
                <td>2.40</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CMF RF</td>
                <td>43.96</td>
                <td>9.08</td>
                <td>34.57</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF<sup>e</sup> LR</td>
                <td>62.44</td>
                <td>22.59</td>
                <td>58.01</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF RF</td>
                <td>58.44</td>
                <td>17.66</td>
                <td>52.34</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF supervised</td>
                <td>66.86<sup>f</sup></td>
                <td>39.22<sup>f</sup></td>
                <td>65.7<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>View 2</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CMF LR</td>
                <td>39.95</td>
                <td>3.38</td>
                <td>22.87</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CMF RF</td>
                <td>41.05</td>
                <td>4.99</td>
                <td>26.83</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF LR</td>
                <td>63.71</td>
                <td>25.34</td>
                <td>59.87</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF RF</td>
                <td>62.48</td>
                <td>22.95</td>
                <td>58.31</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF supervised</td>
                <td>67.96<sup>f</sup></td>
                <td>39.58<sup>f</sup></td>
                <td>66.69<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>View 3</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CMF LR</td>
                <td>9.39</td>
                <td>2.00</td>
                <td>5.21</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CMF RF</td>
                <td>44.51</td>
                <td>10.90</td>
                <td>37.44</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF LR</td>
                <td>60.94</td>
                <td>22.56</td>
                <td>56.94</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF RF</td>
                <td>56.17</td>
                <td>17.26</td>
                <td>49.88</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF supervised</td>
                <td>70.87<sup>f</sup></td>
                <td>41.10<sup>f</sup></td>
                <td>69.39<sup>f</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>SDAE: stacked denoising autoencoder.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>LR: logistic regression.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>RF: random forest.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>CMF: collective matrix factorization.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>DCMF: deep collective matrix factorization.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>Best score for the corresponding view.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p><xref rid="figure4" ref-type="fig">Figure 4</xref> shows the results of the diagnosis category prediction across the 3 views. In view 1, we observed that neural representations from SDAE and DCMF outperformed nonneural representations from CMF. The supervised DCMF outperformed all other methods. The addition of information from KGs in view 2 improved the performance of DCMF, both unsupervised and supervised, in all 3 metrics. The addition of structured EMR data in view 3 further improved the performance.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Diagnosis category prediction across Views. Top row: accuracy; middle row: macro F1 score; bottom row: weighted F1 score. CMF: collective matrix factorization; DCMF: deep collective matrix factorization; LR: logistic regression; RF: random forest; SDAE: stacked denoising autoencoder.</p>
          </caption>
          <graphic xlink:href="medinform_v10i1e28842_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p><xref rid="figure5" ref-type="fig">Figure 5</xref> shows the same results of diagnosis category prediction as seen in <xref rid="figure4" ref-type="fig">Figure 4</xref> but is organized based on the method. SDAE representations cannot be used in augmented multi-view settings but outperform CMF-based representations even when the CMF uses more data in views 2 and 3. This is likely because of the better representation learning capability of the neural networks. We also see that the DCMF learned better representations from all 3 views. However, although the addition of KG matrices in view 2 improved performance over view 1, further addition of data in view 3 deteriorated performance. However, with the addition of supervision from the labels, supervised DCMF was able to learn better with increasing performance across the 3 views.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Diagnosis category prediction across Models. Top row: accuracy; middle row: macro F1 score; bottom row: weighted F1 score. CMF: collective matrix factorization; DCMF: deep collective matrix factorization; LR: logistic regression; RF: random forest; SDAE: stacked denoising autoencoder.</p>
          </caption>
          <graphic xlink:href="medinform_v10i1e28842_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Mortality Prediction</title>
        <p><xref ref-type="table" rid="table4">Table 4</xref> shows the results of mortality prediction. We observed that supervised DCMF outperformed SDAE-based models by &#62;16% in AUC and &#62;13% in macro-F1 in view 1, where data were obtained from clinical notes. In views 2 and 3, where data from KGs and EMRs were cumulatively added to clinical notes, supervised DCMF outperformed all the baselines by similar margins. These results demonstrate the advantage of end-to-end learning using supervised DCMF over other methods that separately learn representations and classifiers.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Results of mortality prediction.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="220"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Model and view</td>
                <td>AUC<sup>a</sup> (%)</td>
                <td>F1 score-macro (%)</td>
                <td>F1 score-weighted (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>View 1</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SDAE<sup>b</sup> LR<sup>c</sup></td>
                <td>52.06</td>
                <td>53.15</td>
                <td>83.95</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SDAE RF<sup>d</sup></td>
                <td>51.55</td>
                <td>47.77</td>
                <td>82.65</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CMF<sup>e</sup> LR</td>
                <td>50.37</td>
                <td>48.59</td>
                <td>81.90</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CMF RF</td>
                <td>50.21</td>
                <td>47.55</td>
                <td>82.44</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF<sup>f</sup> LR</td>
                <td>51.96</td>
                <td>50.88</td>
                <td>83.41</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF RF</td>
                <td>50.31</td>
                <td>47.48</td>
                <td>82.58</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF supervised</td>
                <td>60.44<sup>g</sup></td>
                <td>60.41<sup>g</sup></td>
                <td>83.99<sup>g</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>View 2</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CMF LR</td>
                <td>50.00</td>
                <td>46.81</td>
                <td>82.40</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CMF RF</td>
                <td>50.04</td>
                <td>46.91</td>
                <td>82.43</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF LR</td>
                <td>53.48</td>
                <td>53.71</td>
                <td>84.04</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF RF</td>
                <td>51.38</td>
                <td>49.76</td>
                <td>83.12</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF supervised</td>
                <td>60.41<sup>g</sup></td>
                <td>60.25<sup>g</sup></td>
                <td>82.97<sup>g</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>View 3</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CMF LR</td>
                <td>49.99</td>
                <td>46.81</td>
                <td>82.39</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CMF RF</td>
                <td>50.00</td>
                <td>46.95</td>
                <td>82.37</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF LR</td>
                <td>51.76</td>
                <td>50.57</td>
                <td>83.28</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF RF</td>
                <td>50.08</td>
                <td>47.00</td>
                <td>82.44</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>DCMF supervised</td>
                <td>61.22<sup>g</sup></td>
                <td>62.05<sup>g</sup></td>
                <td>84.43<sup>g</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>AUC: area under receiver operating characteristic curve.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>SDAE: stacked denoising autoencoders.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>LR: logistic regression.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>RF: random forest.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>CMF LR: collective matrix factorization.</p>
            </fn>
            <fn id="table4fn6">
              <p><sup>f</sup>DCMF: deep collective matrix factorization.</p>
            </fn>
            <fn id="table4fn7">
              <p><sup>g</sup>Best score for the corresponding view.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p><xref rid="figure6" ref-type="fig">Figure 6</xref> shows the AUC and F1 scores obtained by the methods across the 3 views. In view 1, the SDAE representations outperform those from CMF. Results with the logistic regression classifier were marginally better than those from the random forest, with SDAE, CMF, and DCMF representations. In view 1, DCMF representations have performance comparable with that of SDAE. Supervised DCMF outperformed all other methods by a large margin. The addition of KG matrices in view 2 improved the performance of the unsupervised DCMF-based classifier. The addition of structured EMR data in view 3 improved the performance of the supervised DCMF.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Mortality prediction across Views. Top row: area under receiver operating characteristic curve; bottom row: F1 score. AUC: area under receiver operating characteristic curve; CMF: collective matrix factorization; DCMF: deep collective matrix factorization; LR: logistic regression; RF: random forest; SDAE: stacked denoising autoencoder.</p>
          </caption>
          <graphic xlink:href="medinform_v10i1e28842_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p><xref rid="figure7" ref-type="fig">Figure 7</xref> shows the same results from <xref rid="figure6" ref-type="fig">Figure 6</xref>, but is organized based on each method. The performances of the unsupervised neural methods SDAE and DCMF are comparable. DCMF can use information from KG matrices to boost its performance. However, the addition of structured EMR data did not increase its performance. However, supervised DCMF is able to use additional data well and achieves the best performance overall with view 3.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Mortality prediction across Models. Top row: area under receiver operating characteristic curve; bottom row: F1 score. AUC: area under receiver operating characteristic curve; CMF: collective matrix factorization; DCMF: deep collective matrix factorization; LR: logistic regression; RF: random forest; SDAE: stacked denoising autoencoder.</p>
          </caption>
          <graphic xlink:href="medinform_v10i1e28842_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our experiments strongly suggest that end-to-end models that are trained in a supervised manner outperform models comprising 2 stages of unsupervised representation learning and an independently learned classifier. An end-to-end neural model also learns patient representations internally; however, these representations are influenced by task-specific labels used for supervision. How these supervised representations perform on tasks other than what they are trained for, that is, whether they are beneficial in transfer learning, remains to be examined. Thus, for a given clinical decision support task, if labels are available, our experiments indicate that an end-to-end model should be preferred.</p>
        <p>DCMF provides a seamless way of integrating multiple sources of data for obtaining patient representations in both unsupervised and supervised settings. As a versatile learning method, it can be used with inputs from a single source (eg, clinical notes) as well as when inputs are from multiple sources (eg, clinical notes and structured EMR tables). Its performance in these settings is comparable with that of previous autoencoder-based representation learning methods. DCMF can also be used to obtain representations in augmented multi-view settings containing arbitrary collections of matrices, where most previous representation learning methods cannot be used directly. In such settings, its performance is considerably superior to that of the previous nonneural methods for CMF. Thus, it provides a framework for infusing valuable information from auxiliary information sources, such as KG, into patient representations.</p>
        <p>Graph embeddings allow us to obtain vectorial representations of nodes in a graph in a way that incorporates the global structural and semantic properties of the graph. Such embeddings can be obtained for KGs as well. The technique for obtaining the embedding can be formulated as a factorization of a similarity matrix where the similarities between nodes are defined based on the number and structural characteristics of the paths between them. With this formulation, the factorization can become part of CMF, which enables us to learn patient representations from multiple clinical data sources as well as KGs. Such patient representations were found to improve downstream predictive performance, especially in supervised settings. Other ways of using KGs within DCMF were not found to be as effective; the 2 alternatives tested were directly using the adjacency matrices of the graphs and first obtaining graph embeddings and then using the embedding matrices within CMF.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Our experimental evaluation was conducted on 2 clinical decision support tasks: a binary classification task (mortality prediction) and a multilabel classification task (primary diagnosis category prediction). Furthermore, the evaluation was performed on a subset of data sources (clinical notes, laboratory investigations, and medications) from a single hospital. The trends in performance are expected to remain the same for other tasks (eg, regression tasks) and the addition of other data sources (eg, radiology images) but must be empirically verified.</p>
        <p>The KG used is derived automatically from biomedical literature using NLP techniques. Inaccuracies because of NLP algorithms may lead to false positives (erroneous nodes and edges) and false negatives (incompleteness) in KG. Further investigation into the effects of these inaccuracies in the representations is required. Evaluation of KGs derived from other sources can also be performed. It is possible that the results may improve with decreasing inaccuracies in the KG.</p>
        <p>Very little hyperparameter tuning was performed for the neural models. The results of all neural models are expected to improve with more tuning. The autoencoders used within the DCMF are simple feedforward networks. Other types of autoencoders, such as SDAE or variational autoencoders, may also be used, which may improve the performance of the DCMF.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, we investigated the use of DCMF to obtain patient representations for 2 clinical decision support tasks. The key advantage of DCMF is its versatility: it can be used to obtain representations from a single view (eg, clinical notes), from multiple views (eg, notes and structured tables in EMR data), and in <italic>augmented</italic> multi-view settings where it can seamlessly integrate information from diverse sources such as EMR data and KGs. Most previous representation learning methods cannot be used with such augmented multi-view data. Furthermore, DCMF can be easily used to learn representations in both supervised and unsupervised settings. In our experiments, we found that DCMF-based representations lead to predictive accuracy that is comparable with or better than previous techniques. Thus, DCMF offers an effective way of integrating heterogeneous data sources and infusing auxiliary knowledge into patient representations.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CMF</term>
          <def>
            <p>collective matrix factorization</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CUI</term>
          <def>
            <p>controlled unique identifier</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">DCMF</term>
          <def>
            <p>deep collective matrix factorization</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">GRA</term>
          <def>
            <p>global resource allocation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">HIN</term>
          <def>
            <p>heterogeneous information network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">ICU</term>
          <def>
            <p>intensive care unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">KG</term>
          <def>
            <p>knowledge graph</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">MIMIC</term>
          <def>
            <p>Medical Information Mart for Intensive Care</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">SDAE</term>
          <def>
            <p>stacked denoising autoencoder</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">SemMedDB</term>
          <def>
            <p>Semantic Medline Database</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the Singapore Ministry of Education Academic Research Fund (R-253-000-159-114). The principal investigator was VR.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>SK implemented supervised deep collective matrix factorization (DCMF) and scripts to use baseline algorithms. AR and AN implemented the global resource allocation (GRA) similarity. SK, AN, and RM conducted the experiments. VR, SK, and RM wrote the manuscript. VR conceived and supervised the project.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huddar</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Desiraju</surname>
              <given-names>BK</given-names>
            </name>
            <name name-style="western">
              <surname>Rajan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Bhattacharya</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Reddy</surname>
              <given-names>CK</given-names>
            </name>
          </person-group>
          <article-title>Predicting complications in critical care using heterogeneous clinical data</article-title>
          <source>IEEE Access</source>
          <year>2016</year>
          <volume>4</volume>
          <fpage>7988</fpage>
          <lpage>8001</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2016.2618775</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ghanvatkar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rajan</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Deep recurrent neural networks for mortality prediction in intensive care using clinical time series at multiple resolutions</article-title>
          <source>Proceedings of the ICIS Conference 2019</source>
          <year>2019</year>
          <conf-name>ICIS conference 2019</conf-name>
          <conf-date>Dec 15-18, 2019</conf-date>
          <conf-loc>Munich, Germany</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aisel.aisnet.org/icis2019/data_science/data_science/12/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gartner</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kolisch</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Neill</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Padman</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Machine learning approaches for early DRG classification and resource allocation</article-title>
          <source>INFORMS J Comput</source>
          <year>2015</year>
          <month>11</month>
          <volume>27</volume>
          <issue>4</issue>
          <fpage>718</fpage>
          <lpage>34</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1287/ijoc.2015.0655"/>
          </comment>
          <pub-id pub-id-type="doi">10.1287/ijoc.2015.0655</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arnaud</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Elbattah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gignon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dequen</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Deep learning to predict hospitalization at triage: integration of structured data and unstructured text</article-title>
          <source>Proceedings of the IEEE International Conference on Big Data (Big Data)</source>
          <year>2020</year>
          <conf-name>IEEE International Conference on Big Data (Big Data)</conf-name>
          <conf-date>Dec 10-13, 2020</conf-date>
          <conf-loc>Atlanta, GA, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/bigdata50022.2020.9378073</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kidd</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>Deep patient: an unsupervised representation to predict the future of patients from the electronic health records</article-title>
          <source>Sci Rep</source>
          <year>2016</year>
          <month>05</month>
          <day>17</day>
          <volume>6</volume>
          <fpage>26094</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/srep26094"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/srep26094</pub-id>
          <pub-id pub-id-type="medline">27185194</pub-id>
          <pub-id pub-id-type="pii">srep26094</pub-id>
          <pub-id pub-id-type="pmcid">PMC4869115</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sushil</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Šuster</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Luyckx</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Daelemans</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Patient representation learning and interpretable evaluation using clinical notes</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>08</month>
          <volume>84</volume>
          <fpage>103</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30126-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.06.016</pub-id>
          <pub-id pub-id-type="medline">29966746</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30126-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Silva</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Matos</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Oliveira</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>A two-stage workflow to extract and harmonize drug mentions from clinical notes into observational databases</article-title>
          <source>J Biomed Inform</source>
          <year>2021</year>
          <month>08</month>
          <volume>120</volume>
          <fpage>103849</fpage>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2021.103849</pub-id>
          <pub-id pub-id-type="medline">34214696</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(21)00178-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Relational learning via collective matrix factorizationProceedings of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining</article-title>
          <source>Proceedings of the 14th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2008</year>
          <conf-name>14th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>Aug 24 - 27, 2008</conf-date>
          <conf-loc>Las Vegas Nevada USA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/1401890.1401969</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mariappan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rajan</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Deep collective matrix factorization for augmented multi-view learning</article-title>
          <source>Mach Learn</source>
          <year>2019</year>
          <month>5</month>
          <day>17</day>
          <volume>108</volume>
          <issue>8-9</issue>
          <fpage>1395</fpage>
          <lpage>420</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1007/s10994-019-05801-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10994-019-05801-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of sentences and documents</article-title>
          <source>Proceedings of the 31st International Conference on Machine Learning</source>
          <year>2014</year>
          <conf-name>31st International Conference on Machine Learning</conf-name>
          <conf-date>June 22-24, 2014</conf-date>
          <conf-loc>Beijing, China</conf-loc>
          <fpage>1188</fpage>
          <lpage>96</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.mlr.press/v32/le14.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Adaptive learning of local semantic and global structure representations for text classification</article-title>
          <source>Proceedings of the 27th International Conference on Computational Linguistics</source>
          <year>2018</year>
          <conf-name>27th International Conference on Computational Linguistics</conf-name>
          <conf-date>Aug 20-26, 2018</conf-date>
          <conf-loc>Santa Fe, New Mexico, USA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/C18-1173/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pouyanfar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shyu</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Multimodal deep representation learning for video classification</article-title>
          <source>World Wide Web</source>
          <year>2019</year>
          <month>5</month>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>1325</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1007/s11280-018-0548-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11280-018-0548-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hua</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Online user representation learning across heterogeneous social networks</article-title>
          <source>Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval</source>
          <year>2019</year>
          <conf-name>42nd International ACM SIGIR Conference on Research and Development in Information Retrieval</conf-name>
          <conf-date>Jul 21 - 25, 2019</conf-date>
          <conf-loc>Paris France</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3331184.3331258</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liany</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jeyasekharan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rajan</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Predicting synthetic lethal interactions using heterogeneous data sources</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>04</month>
          <day>01</day>
          <volume>36</volume>
          <issue>7</issue>
          <fpage>2209</fpage>
          <lpage>16</lpage>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz893</pub-id>
          <pub-id pub-id-type="medline">31782759</pub-id>
          <pub-id pub-id-type="pii">5646644</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Representation learning for clinical time series prediction tasks in electronic health records</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <month>12</month>
          <day>17</day>
          <volume>19</volume>
          <issue>Suppl 8</issue>
          <fpage>259</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-0985-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-0985-7</pub-id>
          <pub-id pub-id-type="medline">31842854</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-0985-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC6916209</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Widdows</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Embedding of semantic predications</article-title>
          <source>J Biomed Inform</source>
          <year>2017</year>
          <month>04</month>
          <volume>68</volume>
          <fpage>150</fpage>
          <lpage>66</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30052-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.03.003</pub-id>
          <pub-id pub-id-type="medline">28284761</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30052-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5441848</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dasgupta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jayagopal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jun Hong</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Mariappan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rajan</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Adverse drug event prediction using noisy literature-derived knowledge graphs: algorithm development and validation</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>10</month>
          <day>25</day>
          <volume>9</volume>
          <issue>10</issue>
          <fpage>e32730</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/10/e32730/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/32730</pub-id>
          <pub-id pub-id-type="medline">34694230</pub-id>
          <pub-id pub-id-type="pii">v9i10e32730</pub-id>
          <pub-id pub-id-type="pmcid">PMC8576589</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dubois</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nathanael</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>David</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nigam</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kenneth</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Effective representations from clinical notes</article-title>
          <source>arXiv</source>
          <year>2018</year>
          <month>08</month>
          <day>16</day>
          <access-date>2022-01-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1705.07025">https://arxiv.org/abs/1705.07025</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suresh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Peter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Marzyeh</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The use of autoencoders for discovering patient phenotypes</article-title>
          <source>arXiv</source>
          <year>2017</year>
          <month>03</month>
          <day>20</day>
          <access-date>2022-01-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1703.07004">https://arxiv.org/abs/1703.07004</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vincent</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Larochelle</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lajoie</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Manzagol</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Stacked denoising autoencoders: learning useful representations in a deep network with a local denoising criterion</article-title>
          <source>J Mach Learn Res</source>
          <year>2010</year>
          <month>1</month>
          <day>3</day>
          <volume>11</volume>
          <fpage>3371</fpage>
          <lpage>408</lpage>
          <pub-id pub-id-type="doi">10.5555/1756006.1953039</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Perozzi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Rfou</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Skiena</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>DeepWalk: online learning of social representations</article-title>
          <source>Proceedings of the 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2014</year>
          <conf-name>20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>Aug 24 - 27, 2014</conf-date>
          <conf-loc>New York USA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2623330.2623732</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grover</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Leskovec</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>node2vec: scalable feature learning for networks</article-title>
          <source>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2016</year>
          <conf-name>22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>Aug 13 - 17, 2016</conf-date>
          <conf-loc>San Francisco California USA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2939672.2939754</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chawla</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Swami</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>metapath2vec: scalable representation learning for heterogeneous networks</article-title>
          <source>Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2017</year>
          <conf-name>23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>Aug 13-17, 2017</conf-date>
          <conf-loc>Halifax NS Canada</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3097983.3098036</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Swami</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chawla</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Heterogeneous graph neural network</article-title>
          <source>Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery &#38; Data Mining</source>
          <year>2019</year>
          <conf-name>25th ACM SIGKDD International Conference on Knowledge Discovery &#38; Data Mining</conf-name>
          <conf-date>Aug 4-8, 2019</conf-date>
          <conf-loc>Anchorage AK USA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3292500.3330961</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Pei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>A survey on network embedding</article-title>
          <source>IEEE Trans Knowl Data Eng</source>
          <year>2019</year>
          <month>5</month>
          <day>1</day>
          <volume>31</volume>
          <issue>5</issue>
          <fpage>833</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2018.2849727</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>VW</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>KC</given-names>
            </name>
          </person-group>
          <article-title>A comprehensive survey of graph embedding: problems, techniques, and applications</article-title>
          <source>IEEE Trans Knowl Data Eng</source>
          <year>2018</year>
          <month>9</month>
          <day>1</day>
          <volume>30</volume>
          <issue>9</issue>
          <fpage>1616</fpage>
          <lpage>37</lpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2018.2807452</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Heterogeneous network representation learning: a unified framework with survey and benchmark</article-title>
          <source>IEEE Trans Knowl Data Eng</source>
          <year>2020</year>
          <month>12</month>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2020.3045924</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Knowledge graph embedding: a survey of approaches and applications</article-title>
          <source>IEEE Trans Knowl Data Eng</source>
          <year>2017</year>
          <month>12</month>
          <day>1</day>
          <volume>29</volume>
          <issue>12</issue>
          <fpage>2724</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2017.2754499</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Murata</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kotarasu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhuang</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A general view for network embedding as matrix factorization</article-title>
          <source>Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining</source>
          <year>2019</year>
          <conf-name>Twelfth ACM International Conference on Web Search and Data Mining</conf-name>
          <conf-date>Feb 11-15, 2019</conf-date>
          <conf-loc>Melbourne VIC Australia</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3289600.3291029</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The Unified Medical Language System (UMLS): integrating biomedical terminology</article-title>
          <source>Nucleic Acids Res</source>
          <year>2004</year>
          <month>01</month>
          <day>01</day>
          <volume>32</volume>
          <issue>Database issue</issue>
          <fpage>267</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/14681409"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id>
          <pub-id pub-id-type="medline">14681409</pub-id>
          <pub-id pub-id-type="pii">32/suppl_1/D267</pub-id>
          <pub-id pub-id-type="pmcid">PMC308795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <article-title>Fact sheet MEDLINE®</article-title>
          <source>U.S. National Library of Medicine</source>
          <access-date>2021-12-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="httpshttp://wayback.archive-it.org/org-350/20180312141554/https://www.nlm.nih.gov/pubs/factsheets/medline.html://www.nlm.nih.gov/pubs/factsheets/medline.html">httpshttp://wayback.archive-it.org/org-350/20180312141554/https://www.nlm.nih.gov/pubs/factsheets/medline.html://www.nlm.nih.gov/pubs/factsheets/medline.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Himmelstein</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Lizee</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hessler</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Brueggeman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Hadley</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Khankhanian</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Baranzini</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>Systematic integration of biomedical knowledge prioritizes drugs for repurposing</article-title>
          <source>Elife</source>
          <year>2017</year>
          <month>09</month>
          <day>22</day>
          <volume>6</volume>
          <fpage>e26726</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.7554/eLife.26726"/>
          </comment>
          <pub-id pub-id-type="doi">10.7554/eLife.26726</pub-id>
          <pub-id pub-id-type="medline">28936969</pub-id>
          <pub-id pub-id-type="pii">26726</pub-id>
          <pub-id pub-id-type="pmcid">PMC5640425</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ernst</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Siu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Weikum</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>KnowLife: a versatile approach for constructing a large knowledge graph for biomedical sciences</article-title>
          <source>BMC Bioinformatics</source>
          <year>2015</year>
          <month>05</month>
          <day>14</day>
          <volume>16</volume>
          <fpage>157</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-015-0549-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12859-015-0549-5</pub-id>
          <pub-id pub-id-type="medline">25971816</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12859-015-0549-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC4448285</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kilicoglu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Fiszman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rosemblat</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rindflesch</surname>
              <given-names>TC</given-names>
            </name>
          </person-group>
          <article-title>SemMedDB: a PubMed-scale repository of biomedical semantic predications</article-title>
          <source>Bioinformatics</source>
          <year>2012</year>
          <month>12</month>
          <day>01</day>
          <volume>28</volume>
          <issue>23</issue>
          <fpage>3158</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/bts591</pub-id>
          <pub-id pub-id-type="medline">23044550</pub-id>
          <pub-id pub-id-type="pii">bts591</pub-id>
          <pub-id pub-id-type="pmcid">PMC3509487</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rindflesch</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Fiszman</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The interaction of domain knowledge and linguistic structure in natural language processing: interpreting hypernymic propositions in biomedical text</article-title>
          <source>J Biomed Inform</source>
          <year>2003</year>
          <month>12</month>
          <volume>36</volume>
          <issue>6</issue>
          <fpage>462</fpage>
          <lpage>77</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532046403001175"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2003.11.003</pub-id>
          <pub-id pub-id-type="medline">14759819</pub-id>
          <pub-id pub-id-type="pii">S1532046403001175</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Che</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Kale</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bahadori</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Deep computational phenotyping</article-title>
          <source>Proceedings of the 21th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2015</year>
          <conf-name>21th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>Aug 10-13, 2015</conf-date>
          <conf-loc>Sydney NSW Australia</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2783258.2783365</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bahadori</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>GRAM: Graph-based attention model for healthcare representation learning</article-title>
          <source>Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2017</year>
          <conf-name>23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>Aug 13 - 17, 2017</conf-date>
          <conf-loc>Halifax NS Canada</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3097983.3098126</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chitta</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>KAME: Knowledge-based attention model for diagnosis prediction in healthcare</article-title>
          <source>Proceedings of the 27th ACM International Conference on Information and Knowledge Management</source>
          <year>2018</year>
          <conf-name>27th ACM International Conference on Information and Knowledge Management</conf-name>
          <conf-date>Oct 22 - 26, 2018</conf-date>
          <conf-loc>Torino Italy</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3269206.3271701</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lv</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Domain knowledge guided deep learning with electronic health records</article-title>
          <source>Proceedings of the IEEE International Conference on Data Mining (ICDM)</source>
          <year>2019</year>
          <conf-name>IEEE International Conference on Data Mining (ICDM)</conf-name>
          <conf-date>Nov 8-11, 2019</conf-date>
          <conf-loc>Beijing, China</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icdm.2019.00084</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bordes</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Usunier</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia-Duran</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yakhnenko</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Translating embeddings for modeling multi-relational data</article-title>
          <source>Proceedings of the 26th International Conference on Neural Information Processing Systems</source>
          <year>2013</year>
          <conf-name>26th International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>Dec 5 - 10, 2013</conf-date>
          <conf-loc>Lake Tahoe Nevada</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hal.archives-ouvertes.fr/hal-00920777/document"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>MIMIC-III, a freely accessible critical care database</article-title>
          <source>Sci Data</source>
          <year>2016</year>
          <month>05</month>
          <day>24</day>
          <volume>3</volume>
          <fpage>160035</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/sdata.2016.35"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id>
          <pub-id pub-id-type="medline">27219127</pub-id>
          <pub-id pub-id-type="pii">sdata201635</pub-id>
          <pub-id pub-id-type="pmcid">PMC4878278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Masanz</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ogren</surname>
              <given-names>PV</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kipper-Schuler</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
          </person-group>
          <article-title>Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): architecture, component evaluation and applications</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>507</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20819853"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.001560</pub-id>
          <pub-id pub-id-type="medline">20819853</pub-id>
          <pub-id pub-id-type="pii">17/5/507</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995668</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="web">
          <article-title>RxNav: browser and application programming interfaces for RxNorm</article-title>
          <source>The National Center for Biomedical Ontology</source>
          <access-date>2021-12-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ncbo.bioontology.org/RxNav">https://ncbo.bioontology.org/RxNav</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Quan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sundararajan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Halfon</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fong</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Burnand</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Luthi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Saunders</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Beck</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Feasby</surname>
              <given-names>TE</given-names>
            </name>
            <name name-style="western">
              <surname>Ghali</surname>
              <given-names>WA</given-names>
            </name>
          </person-group>
          <article-title>Coding algorithms for defining comorbidities in ICD-9-CM and ICD-10 administrative data</article-title>
          <source>Med Care</source>
          <year>2005</year>
          <month>11</month>
          <volume>43</volume>
          <issue>11</issue>
          <fpage>1130</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1097/01.mlr.0000182534.19832.83</pub-id>
          <pub-id pub-id-type="medline">16224307</pub-id>
          <pub-id pub-id-type="pii">00005650-200511000-00010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Norman</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Burroughs</surname>
              <given-names>VJ</given-names>
            </name>
          </person-group>
          <article-title>Addressing medical coding and billing part II: a strategy for achieving compliance. A risk management approach for reducing coding and billing errors</article-title>
          <source>J Natl Med Assoc</source>
          <year>2002</year>
          <month>06</month>
          <volume>94</volume>
          <issue>6</issue>
          <fpage>430</fpage>
          <lpage>47</lpage>
          <pub-id pub-id-type="medline">12078924</pub-id>
          <pub-id pub-id-type="pmcid">PMC2594405</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Band</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mathur</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Papay</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Khanna</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Cywinski</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Maheshwari</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Xing</surname>
              <given-names>EP</given-names>
            </name>
          </person-group>
          <article-title>Multimodal machine learning for automated ICD coding</article-title>
          <source>Proceedings of the 4th Machine Learning for Healthcare Conference</source>
          <year>2019</year>
          <conf-name>4th Machine Learning for Healthcare Conference</conf-name>
          <conf-date>Aug 9-10, 2019</conf-date>
          <conf-loc>Ann Arbor, Michigan</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.mlr.press/v106/xu19a.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>ICD coding from clinical text using multi-filter residual convolutional neural network</article-title>
          <source>Proc Conf AAAI Artif Intell</source>
          <year>2020</year>
          <month>02</month>
          <volume>34</volume>
          <issue>5</issue>
          <fpage>8180</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/34322282"/>
          </comment>
          <pub-id pub-id-type="doi">10.1609/aaai.v34i05.6331</pub-id>
          <pub-id pub-id-type="medline">34322282</pub-id>
          <pub-id pub-id-type="pmcid">PMC8315310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dierdonck</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Hospital resource planning: concepts, feasibility, and framework</article-title>
          <source>Prod Oper Manag</source>
          <year>2009</year>
          <month>3</month>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>2</fpage>
          <lpage>29</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/240271722_Hospital_resource_planning_Concepts_feasibility_and_framework"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1937-5956.1995.tb00038.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bhattacharya</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rajan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Shrivastava</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>ICU mortality prediction: a classification algorithm for imbalanced datasets</article-title>
          <source>Proceedings of the AAAI Conference on Artificial Intelligence</source>
          <year>2017</year>
          <conf-name>AAAI Conference on Artificial Intelligence</conf-name>
          <conf-date>February 4-9, 2017</conf-date>
          <conf-loc>San Francisco, California USA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ojs.aaai.org/index.php/AAAI/article/view/10721"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Machine Learning</source>
          <year>2001</year>
          <volume>45</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Srivastava</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Krizhevsky</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>IR</given-names>
            </name>
            <name name-style="western">
              <surname>Salakhutdinov</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Dropout: a simple way to prevent neural networks from overfitting</article-title>
          <source>J Mach Learn Res</source>
          <year>2014</year>
          <volume>15</volume>
          <issue>56</issue>
          <fpage>1929</fpage>
          <lpage>58</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jmlr.org/papers/v15/srivastava14a.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kingma</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ba</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Adam: a method for stochastic optimization</article-title>
          <source>Proceedings of the International Conference on Learning Representations (ICLR)</source>
          <year>2015</year>
          <conf-name>International Conference on Learning Representations (ICLR)</conf-name>
          <conf-date>May 7-9, 2015</conf-date>
          <conf-loc>San Diego</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1412.6980"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klami</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vare</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Held</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>CMF: Collective Matrix Factorization</article-title>
          <source>Cran.R-Project</source>
          <year>2020</year>
          <access-date>2021-12-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cran.r-project.org/web/packages/CMF/">https://cran.r-project.org/web/packages/CMF/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
