<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="review-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i5e16452</article-id>
      <article-id pub-id-type="pmid">32463370</article-id>
      <article-id pub-id-type="doi">10.2196/16452</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Review</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Review</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Challenges of Clustering Multimodal Clinical Data: Review of Applications in Asthma Subtyping</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Pikoula</surname>
            <given-names>Maria</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Newby</surname>
            <given-names>Chris</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Usop</surname>
            <given-names>Kat</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Horne</surname>
            <given-names>Elsie</given-names>
          </name>
          <degrees>BSc, MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Usher Institute, Edinburgh Medical School</institution>
            <institution>University of Edinburgh</institution>
            <addr-line>Nine Edinburgh Bio Quarter</addr-line>
            <addr-line>9 Little France Road</addr-line>
            <addr-line>Edinburgh, EH16 4UX</addr-line>
            <country>United Kingdom</country>
            <phone>44 1316517887</phone>
            <fax>44 1316517887</fax>
            <email>Elsie.Horne@ed.ac.uk</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5545-7628</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Tibble</surname>
            <given-names>Holly</given-names>
          </name>
          <degrees>BSc, MPhil(Cantab)</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7169-4087</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Sheikh</surname>
            <given-names>Aziz</given-names>
          </name>
          <degrees>BSc, MSc, MBBS, MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7022-3056</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Tsanas</surname>
            <given-names>Athanasios</given-names>
          </name>
          <degrees>BSc, BEng, MSc, DPhil(Oxon)</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0994-8100</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Usher Institute, Edinburgh Medical School</institution>
        <institution>University of Edinburgh</institution>
        <addr-line>Edinburgh</addr-line>
        <country>United Kingdom</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Elsie Horne <email>Elsie.Horne@ed.ac.uk</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>28</day>
        <month>5</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>5</issue>
      <elocation-id>e16452</elocation-id>
      <history>
        <date date-type="received">
          <day>30</day>
          <month>9</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>11</day>
          <month>11</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>10</day>
          <month>12</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>10</day>
          <month>2</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Elsie Horne, Holly Tibble, Aziz Sheikh, Athanasios Tsanas. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 28.05.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2020/5/e16452/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>In the current era of personalized medicine, there is increasing interest in understanding the heterogeneity in disease populations. Cluster analysis is a method commonly used to identify subtypes in heterogeneous disease populations. The clinical data used in such applications are typically multimodal, which can make the application of traditional cluster analysis methods challenging.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to review the research literature on the application of clustering multimodal clinical data to identify asthma subtypes. We assessed common problems and shortcomings in the application of cluster analysis methods in determining asthma subtypes, such that they can be brought to the attention of the research community and avoided in future studies.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We searched PubMed and Scopus bibliographic databases with terms related to cluster analysis and asthma to identify studies that applied dissimilarity-based cluster analysis methods. We recorded the analytic methods used in each study at each step of the cluster analysis process.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our literature search identified 63 studies that applied cluster analysis to multimodal clinical data to identify asthma subtypes. The features fed into the cluster algorithms were of a mixed type in 47 (75%) studies and continuous in 12 (19%), and the feature type was unclear in the remaining 4 (6%) studies. A total of 23 (37%) studies used hierarchical clustering with Ward linkage, and 22 (35%) studies used k-means clustering. Of these 45 studies, 39 had mixed-type features, but only 5 specified dissimilarity measures that could handle mixed-type features. A further 9 (14%) studies used a preclustering step to create small clusters to feed on a hierarchical method. The original sample sizes in these 9 studies ranged from 84 to 349. The remaining studies used hierarchical clustering with other linkages (n=3), medoid-based methods (n=3), spectral clustering (n=1), and multiple kernel k-means clustering (n=1), and in 1 study, the methods were unclear. Of 63 studies, 54 (86%) explained the methods used to determine the number of clusters, 24 (38%) studies tested the quality of their cluster solution, and 11 (17%) studies tested the stability of their solution. Reporting of the cluster analysis was generally poor in terms of the methods employed and their justification.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This review highlights common issues in the application of cluster analysis to multimodal clinical data to identify asthma subtypes. Some of these issues were related to the multimodal nature of the data, but many were more general issues in the application of cluster analysis. Although cluster analysis may be a useful tool for investigating disease subtypes, we recommend that future studies carefully consider the implications of clustering multimodal data, the cluster analysis process itself, and the reporting of methods to facilitate replication and interpretation of findings.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>asthma</kwd>
        <kwd>cluster analysis</kwd>
        <kwd>data mining</kwd>
        <kwd>machine learning</kwd>
        <kwd>unsupervised machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>There is mounting evidence to suggest that some disease labels are in fact <italic>umbrella terms</italic>, which encompass distinct disease subtypes with different underlying mechanisms and clinical symptom manifestations [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. This has encouraged the investigation into heterogeneity within disease populations, which has received considerable interest across diverse domains of medicine [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. There are numerous motivations for better understanding heterogeneity within disease populations, from the development of targeted therapeutics [<xref ref-type="bibr" rid="ref6">6</xref>] to the delivery of more personalized care in clinical practice [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
        <p>It is now understood that asthma is one such umbrella term used to encompass multiple diverse underlying disease symptoms and pathophysiology [<xref ref-type="bibr" rid="ref7">7</xref>]. Asthma is a common chronic condition characterized by reversible airway obstruction. The Global Burden of Disease Study 2017 estimated the global prevalence of asthma (both symptomatic and asymptomatic) to be 273 million [<xref ref-type="bibr" rid="ref8">8</xref>]. This study estimated that in 2017, there were 43 million new cases of asthma and 495,000 deaths attributed to asthma [<xref ref-type="bibr" rid="ref9">9</xref>]. Attempts to categorize asthma into distinct disease subtypes date back to the 1940s [<xref ref-type="bibr" rid="ref10">10</xref>] and are ongoing. However, the methods for discovering these underlying categories have shifted from observing clinical patterns to using data-driven approaches such as <italic>cluster analysis</italic> [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>Cluster analysis is a statistical technique used to identify subgroups in data based on multiple variables (for convenience, herein, we have used the term <italic>features</italic>). It is an <italic>unsupervised</italic> statistical learning method, and the correct number of underlying clusters is typically unknown <italic>a priori</italic> [<xref ref-type="bibr" rid="ref12">12</xref>]. The technique has found increasing use in recent years because of the practical unmet clinical need to identify subtypes of disease and stratify patients to improve health care delivery. This has been made feasible by the increasing availability of clinical datasets and the development of statistical software packages facilitating the application of algorithmic methods.</p>
        <p>Clinical datasets are often <italic>multimodal</italic>; for the purposes of this paper, we defined a multimodal dataset as a dataset that includes features from different sources, measured on different scales. For completeness and to avoid ambiguity, we clarified that the term multimodal has a different meaning in statistical literature (ie, features with multiple modes in terms of its distribution); the use of the term in this study is aligned with clinical literature (having features from different sources). Popular methods of cluster analysis such as k-means and hierarchical clustering with the Ward method have been developed for continuous features measured on a common scale. In practice, however, many of these techniques are frequently applied to multimodal clinical datasets comprising different feature types measured on different scales, conditions that violate some of the underlying principles and assumptions made by algorithmic methods [<xref ref-type="bibr" rid="ref13">13</xref>]. Although steps can be taken to prepare multimodal clinical data for cluster analysis [<xref ref-type="bibr" rid="ref13">13</xref>], the results of a previous review suggest that these steps are rarely taken in practice [<xref ref-type="bibr" rid="ref11">11</xref>]. This previous review focused on the clinical findings of the studies and touched only briefly on the challenges of clustering multimodal data specifically.</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>This review aimed to comprehensively explore whether studies applying cluster analysis to multimodal clinical data to subtype asthma are using appropriate clustering methodologies. The contribution of this study is to make recommendations for the robust application of cluster analysis to multimodal clinical data. We believed this would be of interest to the ever-growing number of asthma researchers engaging or planning to engage in disease subtyping, as well as to the wider community of researchers applying cluster techniques for the purpose of disease subtyping.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Eligibility Criteria and Search Strategy</title>
        <p>This review is reported following the Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) guidelines. <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> shows the completed PRISMA checklist.</p>
        <p>We sought to identify studies that applied cluster analysis to multimodal clinical data with the aim of identifying subtypes of asthma. One researcher (EH) searched PubMed and Scopus databases (search queries are provided in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>) to retrieve studies focusing on patients diagnosed with asthma, which included the term <italic>cluster analysis</italic> or <italic>clustering</italic>. Our search was restricted to studies published between January 1, 2008, and May 23, 2019, as Haldar et al’s study [<xref ref-type="bibr" rid="ref14">14</xref>] is widely acknowledged to be the first to apply cluster analysis to identify subtypes of asthma. Our search excluded comment articles, editorials, letters, reviews, and meta-analyses. We excluded articles that were not written in English.</p>
        <p>We excluded nonrelevant studies by first screening the abstracts, then referring to the full text where necessary. We excluded studies in which (1) none of the aims or objectives were to identify subtypes of asthma (studies looking exclusively at, eg, childhood wheeze were excluded); (2) the data were not multimodal (ie, were measured from a common source and on a common scale); and (3) none of the features were considered clinical (eg, studies concerned only with -omics data). Finally, we excluded studies that used latent class analysis or mixture models to group their data to narrow the scope of this review to methods that cluster samples based on pairwise dissimilarities. The use of latent class analysis to distinguish asthma phenotypes has been reviewed previously by Howard et al [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
        <boxed-text id="box1" position="float">
          <title>Search query to identify studies to include in this review.</title>
          <list list-type="bullet">
            <list-item>
              <p>The following query was inserted in PubMed on May 23, 2019:</p>
            </list-item>
          </list>
          <disp-quote>
            <p>English[Language] AND (“2008/01/01”[Date - Publication] : “2019/05/23”[Date - Publication]) AND (“cluster analysis”[Text Word] OR “clustering*”[Text Word]) AND “asthma*”[Text Word] NOT (comment[Publication Type] OR editorial[Publication Type] OR letter[Publication Type] OR review[Publication Type] OR meta-analysis[Publication Type])</p>
          </disp-quote>
          <list list-type="bullet">
            <list-item>
              <p>The following query was inserted in Scopus on May 23, 2019:</p>
            </list-item>
          </list>
          <disp-quote>
            <p>PUBYEAR &#62; 2007 AND (TITLE-ABS-KEY ( “cluster analysis” ) OR TITLE-ABS-KEY(“clustering*”)) AND TITLE-ABS-KEY (“asthma*”) AND SRCTYPE (“j”) AND DOCTYPE (“ar”) AND LANGUAGE (“English”)</p>
          </disp-quote>
        </boxed-text>
      </sec>
      <sec>
        <title>Data Extraction</title>
        <p>In total, 2 researchers (EH and HT) independently extracted information from the full text and supplementary material of each study. Information was extracted following the steps outlined in the following <italic>Cluster Analysis Steps</italic> section. The data dictionary, which provides details of all items extracted, is presented in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
      </sec>
      <sec>
        <title>Cluster Analysis Steps</title>
        <p>To provide context for this review, we outlined the key steps in the application of cluster analysis to multimodal clinical data. <xref rid="figure1" ref-type="fig">Figure 1</xref> summarizes the steps in the order in which they generally occur, but as with most analytic processes, this depends on the context, and the process may be somewhat iterative.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Schematic of the typical cluster analysis steps.</p>
          </caption>
          <graphic xlink:href="medinform_v8i5e16452_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>Initial Considerations</title>
          <sec>
            <title>Identify Candidate Features</title>
            <p>The first step is to identify the set of features of interest, which we referred to as <italic>candidate features</italic>. These may be identified based on previous studies or clinical input using domain expertise. In some cases, all the candidate features may be used in the cluster analysis (we referred to the features used in cluster analysis as <italic>cluster features</italic>). In other cases, formal feature selection processes may be applied to the candidate features to identify the cluster features, as covered in the <italic>Feature Selection</italic> section.</p>
          </sec>
          <sec>
            <title>Missing Data</title>
            <p>Most common cluster analysis methods use <italic>complete case analysis</italic> (ie, the cluster features have no missing entries, which, in practice, might be achieved by removing samples for which any cluster feature entry is missing). However, it may be more data efficient to develop a strategy to work around missing entries instead of discarding samples. Missing values may be handled through the calculation of dissimilarities, as described by Hastie et al [<xref ref-type="bibr" rid="ref16">16</xref>]. Alternatively, missing data could be imputed, or for categorical features, a missing category could be introduced.</p>
          </sec>
          <sec>
            <title>Sample Size</title>
            <p>Despite the widespread use of cluster analysis, at present, there is no consensus regarding the minimum sample size required to ensure stable and meaningful clustering. Dolnicar et al [<xref ref-type="bibr" rid="ref17">17</xref>] suggested that 70 samples per cluster feature is adequate, based on the findings of their simulation study. Small sample sizes may obscure the true clustering by causing the user to pick the wrong number of clusters (see the <italic>Choosing the Number of Clusters</italic> section) or by producing solutions that are neither reproducible nor stable (see the <italic>Stability</italic> and <italic>Quality</italic> subsections).</p>
          </sec>
        </sec>
        <sec>
          <title>Feature Engineering</title>
          <sec>
            <title>Feature Types</title>
            <p>The features that we may want to use in a clustering algorithm often come from multimodal clinical data. Hence, they may be of different types (eg, continuous, nominal, ordinal, binary, etc) and are likely to be measured on different scales (eg, kilogram for mass, years for age). Most dissimilarity measures and clustering algorithms assume that the features are of the same type and are measured on a common scale. These requirements can be addressed using <italic>feature encoding</italic> and <italic>feature scaling</italic>.</p>
          </sec>
          <sec>
            <title>Feature Encoding</title>
            <p>When dealing with categorical features, it is vital to consider how these are encoded (nominal, ordinal, or binary), as this determines how they are treated in the calculation of dissimilarities and in the clustering algorithm. A common approach is to encode ordinal features as integers and to encode nominal features as dummy binary features [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
          </sec>
          <sec>
            <title>Feature Scaling</title>
            <p>Feature scaling may be used to address 3 issues related to continuous features. The first is that continuous features may be measured in different units and should therefore be rescaled to bring them onto a common scale before calculating dissimilarities. The second is that continuous features measured in the same units may have different variances. In some cases, the differences in variance may be useful for clustering, but in others, these may obscure the true underlying cluster structure in the data. In the latter case, the continuous features should be rescaled. Common approaches to these 2 issues are to standardize features to have 0 mean and unit variance (referred to as <italic>z-scores</italic>) or to use range normalization techniques, for example, to scale each feature so that it is in the interval of 0 to 1.</p>
            <p>The third issue is that the features may not follow the desired probability distribution properties for further analysis (eg, having Gaussian-distributed features). This issue needs to be considered when statistical methods make distributional assumptions. Although few dissimilarity-based clustering methods make distributional assumptions, several methods involve the calculation of cluster means (eg, k-means, hierarchical clustering with the Ward linkage). The mean is a poor choice of summary statistic for a feature that is skewed (or a feature with multiple modes), so a power transformation may be advantageous as a preprocessing step when using such clustering methods.</p>
            <p>When dealing with mixed-type data, it may be necessary to scale the categorical features to avoid assigning categorical features greater weight over continuous features or vice versa. This issue is discussed in detail in the context of dissimilarity measures by Hennig and Liao [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
          </sec>
          <sec>
            <title>Dimensionality Reduction</title>
            <p>There are generally 2 motivations for reducing the dimensionality of a dataset before applying cluster analysis. First, as previously mentioned in the <italic>Sample Size</italic> subsection, datasets with a high feature to sample ratio may not produce stable cluster results. Second, the cluster structure may only be apparent using a subset of the information available in the data. Using all available information may introduce noise, which could obscure the true underlying cluster structure [<xref ref-type="bibr" rid="ref19">19</xref>]. There are 2 approaches to dimensionality reduction: <italic>feature selection</italic> and <italic>feature transformation</italic>.</p>
          </sec>
          <sec>
            <title>Feature Selection</title>
            <p>Feature selection involves selecting a subset of the available features for use in cluster analysis. Herein, we have referred to the features selected for the cluster analysis as <italic>cluster features</italic>.</p>
          </sec>
          <sec>
            <title>Feature Transformation</title>
            <p>Feature transformation involves combining original features to create new features. Generally, a subset of these new features is selected for inclusion in the analysis. It is beyond the scope of this review to provide in-depth details on the methods of feature transformation (also known as <italic>feature extraction</italic>); we referred to van der Maaten et al’s [<xref ref-type="bibr" rid="ref20">20</xref>] work for a comprehensive review. Here, we briefly outlined <italic>principal component analysis</italic> (PCA), which is the most commonly used method for linear data projection. PCA may be applied to <italic>p</italic> continuous, correlated features to extract <italic>m&#60;p</italic> continuous, and uncorrelated features (known as <italic>principal components</italic>), each being a linear function of the original cluster features [<xref ref-type="bibr" rid="ref21">21</xref>]. Related methods include factor analysis for continuous data, <italic>multiple correspondence analysis</italic> (MCA) for categorical data [<xref ref-type="bibr" rid="ref22">22</xref>], and multiple factor analysis for mixed-type data [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
          </sec>
        </sec>
        <sec>
          <title>Cluster Analysis</title>
          <sec>
            <title>Dissimilarity Measures</title>
            <p>Model-free clustering methods rely on a <italic>dissimilarity measure</italic> to quantify how dissimilar 2 samples are from one another. Dissimilarity may also be referred to as a <italic>distance measure</italic> if it satisfies the triangle inequality. The most widely used dissimilarity measure is the squared Euclidean distance (henceforth referred to as <italic>Euclidean distance</italic>), which is intended for use with continuous features. A dissimilarity measure that can handle both categorical and continuous features is the Gower distance [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
          </sec>
          <sec>
            <title>Cluster Analysis Methods</title>
            <p>There are many different methods of cluster analysis (eg, k-means, hierarchical clustering with the Ward linkage, spectral clustering), and each method may be implemented using different algorithms. A comprehensive overview of the wide range of clustering methods can be found elsewhere [<xref ref-type="bibr" rid="ref25">25</xref>].</p>
          </sec>
        </sec>
        <sec>
          <title>Postprocessing</title>
          <sec>
            <title>Choosing the Number of Clusters</title>
            <p>A key challenge in cluster analysis is choosing the number of clusters to present in the final solution, which is typically unknown <italic>a priori</italic>. Often, researchers use their preferred clustering methods, running them for 2 to <italic>k</italic> clusters (where <italic>k</italic> is an integer number indicating the number of clusters) and then have a strategy to determine <italic>k</italic>.</p>
            <p>Providing a detailed commentary on these strategies is beyond the scope of this review. An overview of strategies for choosing <italic>k</italic> is provided by Everitt et al [<xref ref-type="bibr" rid="ref23">23</xref>]. Graphical techniques include dendrograms (when using hierarchical clustering methods) and silhouette plots [<xref ref-type="bibr" rid="ref26">26</xref>]. An alternative approach is to choose the number of clusters that gives the most stable solution [<xref ref-type="bibr" rid="ref27">27</xref>]. In practice, a key determinant in choosing the number of clusters is often the clinical interpretation of the solutions.</p>
            <p>We highlighted the possibility that there might not be meaningful clustering of the data to form groups, and thus, the entire dataset is treated as 1 cluster. This may reflect the lack of statistical power (sufficiently large sample size) to determine clusters or that the investigated problem using that dataset is not amenable to clustering using the available sample size and features. Some statistics used for choosing k, such as the Gap statistic [<xref ref-type="bibr" rid="ref28">28</xref>], can be calculated for k=1. However, statistics that require the calculation of between cluster differences or distances, such as the silhouette statistic, are not defined for k=1 [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
          </sec>
          <sec>
            <title>Stability</title>
            <p>Assessing the quality of a clustering solution produced using any cluster algorithm is challenging. Unlike supervised learning setups, there is no <italic>ground truth</italic> against which one can formally test their findings. However, there are several ways in which one can assess the integrity of their findings.</p>
            <p>Most importantly, it is crucial to assess the <italic>stability</italic> of the resulting clusters. A definition of <italic>cluster stability,</italic> given by von Luxburg [<xref ref-type="bibr" rid="ref27">27</xref>], is whether clustering different datasets sampled from the same underlying joint distribution will result in producing the same clusters. There are several ways in which this may be assessed in practice (eg, by comparing the cluster results of a dataset that has been randomly split into 2 or more subsets, and each subset is independently fed into the cluster algorithm).</p>
          </sec>
          <sec>
            <title>Quality</title>
            <p>Beyond stability, there are numerous steps one may take to ensure the integrity of their cluster analysis findings, for example, repeating the analysis in a different cohort or at a different time point, or altering the encoding of a feature. These steps are often referred to as reproducibility testing. However, we avoided this term because it implies that we seek the exact same results, which we do not feel is reasonable in all scenarios. To extract this information from the studies in this review, 2 reviewers independently extracted details of postprocessing methods, which we felt assessed the quality of the cluster results, but did not come under stability. In our schematic and results, we referred to these methods as testing the quality of the cluster results.</p>
          </sec>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Literature Search Outcomes</title>
        <p>We identified 63 studies that used cluster analysis to identify subtypes of asthma using multimodal clinical data (<xref rid="figure2" ref-type="fig">Figure 2</xref>). One of the excluded articles satisfied our inclusion criteria but investigated 85 combinations of cluster analysis steps in a hierarchical cluster analysis of 383 children with asthma [<xref ref-type="bibr" rid="ref29">29</xref>]. We excluded this study from our review as including all 85 combinations of methods was deemed infeasible. For the 2 studies in which cluster analysis was carried out in multiple populations [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref28">28</xref>], we included only the analysis of the larger population. The characteristics of each study are presented in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Flow of studies into review.</p>
          </caption>
          <graphic xlink:href="medinform_v8i5e16452_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Initial Considerations</title>
        <sec>
          <title>Identifying Candidate Features</title>
          <p>A total of 42 (67%) studies identified candidate features based on previous studies or clinical input (relevance to asthma subtypes, avoiding clinical redundancy, and easily measured in clinical practice). The numbers used in each method are summarized in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        </sec>
        <sec>
          <title>Missing Data</title>
          <p>A total of 42 (67%) studies detailed their methods for dealing with missing data; the methods used are shown in <xref ref-type="table" rid="table1">Table 1</xref>. The most common method was to carry out a complete case analysis by excluding all patients with any missing cluster feature entries (35% of studies).</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Initial considerations across the asthma studies we have included in this review (N=63).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="670"/>
              <col width="300"/>
              <thead>
                <tr valign="bottom">
                  <td colspan="2">Method</td>
                  <td>Values, n (%)<sup>a</sup></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Identifying candidate features</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Clinical intuition and understanding</td>
                  <td>33 (52)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Avoid clinical redundancy</td>
                  <td>15 (24)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Previous studies</td>
                  <td>15 (24)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Easily measured in clinical practice</td>
                  <td>8 (13)</td>
                </tr>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Missing data</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Complete case analysis</td>
                  <td>22 (35)</td>
                </tr>
                <tr valign="bottom">
                  <td>
                    <break/>
                  </td>
                  <td>Features with &#62;x%<sup>b</sup> missing values removed</td>
                  <td>14 (22)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Imputed</td>
                  <td>11 (17)</td>
                </tr>
                <tr valign="bottom">
                  <td>
                    <break/>
                  </td>
                  <td>Patients with &#62;x%<sup>b</sup> missing values removed</td>
                  <td>5 (8)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>No missing data present</td>
                  <td>2 (3)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Clustering methods handle missing data</td>
                  <td>1 (2)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>One study may use multiple methods; some studies may use no methods.</p>
              </fn>
              <fn id="table1fn2">
                <p><sup>b</sup>x&#62;0.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Sample Size</title>
          <p>The sample sizes for cluster analysis ranged from 40 to 3612, with a median of 195 patients. <xref rid="figure3" ref-type="fig">Figure 3</xref> presents a scatter plot of the number of patients in the cluster analysis versus the final number of cluster features. The straight line corresponds to the number of samples per feature as recommended by Dolnicar et al [<xref ref-type="bibr" rid="ref17">17</xref>]. As this estimate was derived from simulation studies using k-means as the clustering method, different markers are used for the studies which used clustering techniques other than k-means. Note that the studies that did not specify the final number of cluster features were omitted from the plot. Six studies (10%) had at least 70 times as many patients as cluster features, as recommended by Dolnicar et al [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Number of patients versus final number of cluster features. The line corresponds to the number of patients that is equal to 70 times the number of features.</p>
            </caption>
            <graphic xlink:href="medinform_v8i5e16452_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Feature Engineering</title>
        <sec>
          <title>Feature Scaling and Encoding</title>
          <p>Judging whether feature scaling and encoding were appropriate depends on the methods of cluster analysis used and vice versa. Therefore, we reported the methods of feature scaling and encoding alongside the methods of cluster analysis in <xref ref-type="table" rid="table2">Tables 2</xref>-<xref ref-type="table" rid="table4">4</xref> and <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Breakdown of methods used by studies applying hierarchical clustering with Ward's linkage (N=23).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="30"/>
              <col width="440"/>
              <col width="300"/>
              <col width="200"/>
              <thead>
                <tr valign="top">
                  <td colspan="3">Data type, dissimilarity, and scaling of continuous features</td>
                  <td>Categorical features encoded as binary?</td>
                  <td>Value, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Continuous</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td colspan="4">
                    <bold>Euclidean assumed</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td/>
                  <td>Not detailed</td>
                  <td>N/A<sup>a</sup></td>
                  <td>1 (4)</td>
                </tr>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Mixed</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td colspan="4">
                    <bold>Euclidean assumed</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td/>
                  <td>Scaled but method unspecified</td>
                  <td>Yes<break/>No</td>
                  <td>1 (4)<break/>1 (4)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td/>
                  <td>Scaled to lie in the interval of 0 to 1</td>
                  <td>Yes</td>
                  <td>1 (4)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td/>
                  <td>z-scores</td>
                  <td>Yes<break/>No</td>
                  <td>1 (4)<break/>1 (4)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td/>
                  <td>Not detailed</td>
                  <td>Yes<break/>No</td>
                  <td>3 (13)<break/>6 (26)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td colspan="4">
                    <bold>Euclidean stated</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td/>
                  <td>z-scores</td>
                  <td>Yes<break/>No</td>
                  <td>2 (9)<break/>1 (4)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td colspan="4">
                    <bold>Gower<sup>b</sup></bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td/>
                  <td>Gower standardisation</td>
                  <td>No</td>
                  <td>3 (13)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td/>
                  <td>Scaled but method unspecified</td>
                  <td>No</td>
                  <td>1 (4)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td colspan="4">
                    <bold>treeClust</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td/>
                  <td>Not detailed</td>
                  <td>No</td>
                  <td>1 (4)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table2fn1">
                <p><sup>a</sup>N/A: not applicable (irrelevant for continuous features).</p>
              </fn>
              <fn id="table2fn2">
                <p><sup>b</sup>Computing the Gower coefficient normalizes the distance between feature samples by dividing by the feature range. Therefore, it is not necessary to normalize continuous features prior to computing the Gower coefficient.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Breakdown of methods used by studies applying k-means (N=22).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="30"/>
              <col width="440"/>
              <col width="300"/>
              <col width="200"/>
              <thead>
                <tr valign="top">
                  <td colspan="3">Data type, dissimilarity, and scaling of continuous features</td>
                  <td>Categorical features encoded as binary?</td>
                  <td>Value, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Continuous</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="4">
                    <bold>Euclidean assumed</bold>
                  </td>
                </tr>
                <tr valign="bottom">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>z-scores for one feature</td>
                  <td>N/A<sup>a</sup></td>
                  <td>1 (5)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>No details</td>
                  <td>N/A</td>
                  <td>3 (14)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="4">
                    <bold>Euclidean stated</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>No details</td>
                  <td>N/A</td>
                  <td>1 (5)</td>
                </tr>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Mixed</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="4">
                    <bold>Euclidean assumed</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>Scaled but method unspecified</td>
                  <td>No</td>
                  <td>1 (5)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>z-scores</td>
                  <td>Yes</td>
                  <td>6 (27)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>z-scores for one feature</td>
                  <td>No</td>
                  <td>1 (5)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>No details</td>
                  <td>Yes<break/>No</td>
                  <td>1 (5)<break/>2 (9)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="4">
                    <bold>Euclidean stated</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>z-scores</td>
                  <td>Yes</td>
                  <td>1 (5)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>No details</td>
                  <td>No</td>
                  <td>1 (5)</td>
                </tr>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Unclear</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="4">
                    <bold>Euclidean assumed</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>No details</td>
                  <td>No</td>
                  <td>3 (14)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="4">
                    <bold>Euclidean stated</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>z-scores</td>
                  <td>No</td>
                  <td>1 (5)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table3fn1">
                <p><sup>a</sup>N/A: not applicable (irrelevant for continuous features).</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>Breakdown of methods used by studies applying SPSS TwoStep (N=7).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="30"/>
              <col width="440"/>
              <col width="300"/>
              <col width="200"/>
              <thead>
                <tr valign="top">
                  <td colspan="3">Data type, dissimilarity, and scaling of continuous features</td>
                  <td>Categorical features encoded as binary?</td>
                  <td>Value, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Continuous</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="4">
                    <bold>Euclidean assumed</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>No details</td>
                  <td>N/A<sup>a</sup></td>
                  <td>1 (14)</td>
                </tr>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Mixed</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="4">
                    <bold>Log-likelihood assumed</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>Scaled to lie in the interval 0 to 1</td>
                  <td>Yes</td>
                  <td>1 (14)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>z-scores</td>
                  <td>No</td>
                  <td>1 (14)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>No details</td>
                  <td>Yes</td>
                  <td>2 (29)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td colspan="4">
                    <bold>Log-likelihood stated</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>Scaled but method unspecified</td>
                  <td>No</td>
                  <td>1 (14)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <break/>
                  </td>
                  <td>No details</td>
                  <td>No</td>
                  <td>1 (14)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table4fn1">
                <p><sup>a</sup>N/A: not applicable (irrelevant for continuous features).</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Univariate Feature Transformation</title>
          <p>A total 23 (37%) studies applied univariate feature transformation to bring features closer to a normal distribution. The most common univariate feature transformation was logarithmic transformation, applied to nonnormally distributed features in 33% of studies. Lefaudeux et al [<xref ref-type="bibr" rid="ref30">30</xref>] applied the Box-Cox transformation to all features, whereas Khusial et al [<xref ref-type="bibr" rid="ref31">31</xref>] stated that data were transformed if necessary but gave no further details.</p>
        </sec>
        <sec>
          <title>Feature Selection</title>
          <p>A total of 22 (35%) studies detailed methods of feature selection to identify their cluster features. The number of features selected in the 63 studies included in this review ranged from 2 to 120, with a median of 12 features. In addition, 47 (75%) studies had mixed-type features, and 12 (19%) had continuous features, and in 4 (6%) studies, the type of features was unclear. Methods for feature selection are listed in <xref ref-type="table" rid="table5">Table 5</xref>.</p>
          <p>A total of 13 (20%) studies used PCA or factor analysis for feature selection. These are not typically methods that should be used for feature selection; we defer further elaboration on the topic for the Discussion. All but one of these studies computed the components (or factors) that represent an underlying latent feature structure, then selected 1 (or in some cases multiple [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]) original feature corresponding to each component (or factor) of the latent feature structure. Just et al [<xref ref-type="bibr" rid="ref34">34</xref>] stated that they used PCA to select features according to statistical significance. As PCA does not involve the computation of statistical significance (<italic>P</italic> values), more detail would be required here to fully understand the methods used for feature selection in this paper. Pérez-Losada et al [<xref ref-type="bibr" rid="ref35">35</xref>] stated PCA based on Euclidean distances was carried out. It is unclear whether this was an error in reporting or whether PCA was applied to the matrix of Euclidean distances between features instead of the covariance matrix. To implement the latter approach, the Euclidean distances would have to be converted to similarities. Moreover, the authors stated that PCA was used <italic>to identify key clinical components relevant to asthma diagnosis and assessment</italic>. Overall, it is not clear how the authors processed the data using PCA, and there was no justification for using Euclidean distances in that computation. Although the application of PCA leads to the computation of features (principal components) that maximally explain the (remaining) variance in the data, there is no guarantee that the resulting principal components will be highly predictive of an outcome (in this case, asthma diagnosis and assessment).</p>
          <table-wrap position="float" id="table5">
            <label>Table 5</label>
            <caption>
              <p>Feature engineering methods used in the asthma studies included in this review.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="570"/>
              <col width="400"/>
              <thead>
                <tr valign="bottom">
                  <td colspan="2">Method</td>
                  <td>Values, n (%)<sup>a</sup></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Univariate feature transformation</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Logarithmic transformation</td>
                  <td>21 (33)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Box-Cox transformation</td>
                  <td>1 (2)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Method not explained</td>
                  <td>1 (2)</td>
                </tr>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Feature selection</bold>
                  </td>
                </tr>
                <tr valign="bottom">
                  <td/>
                  <td>Factor analysis<sup>b</sup></td>
                  <td>8 (13)</td>
                </tr>
                <tr valign="bottom">
                  <td/>
                  <td>Principal component analysis<sup>b</sup></td>
                  <td>5 (8)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Avoid collinearity</td>
                  <td>3 (5)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Avoid multicollinearity</td>
                  <td>3 (5)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Supervised learning methods</td>
                  <td>2 (3)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Multiple correspondence analysis</td>
                  <td>1 (2)</td>
                </tr>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Feature transformation</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Principal component analysis</td>
                  <td>4 (6)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Factor analysis</td>
                  <td>1 (2)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Multiple correspondence analysis</td>
                  <td>1 (2)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table5fn1">
                <p><sup>a</sup>As a percentage of all 63 studies.</p>
              </fn>
              <fn id="table5fn2">
                <p><sup>b</sup>These are not typically methods of feature selection but have been used in these studies.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <p>Three (5%) studies considered collinearity via pairwise correlations, although the exact criteria for selection features based on this were unclear [<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>]. In addition, 3 (5%) studies avoided multicollinearity, but none detailed their methods for doing so [<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref41">41</xref>].</p>
          <p>Furthermore, 2 (3%) studies selected features using statistical hypothesis tests with respect to the outcome of interest. Sakagami et al [<xref ref-type="bibr" rid="ref42">42</xref>] used mean annual decline in forced expiratory volume in 1 second as the outcome feature in a multiple regression analysis using stepwise feature selection. All features with coefficients statistically significantly different to 0 in the multiple regression model were included as cluster features. Seino et al [<xref ref-type="bibr" rid="ref43">43</xref>] grouped participants according to whether or not they had symptoms of depression. Features were selected for cluster analysis if the difference between the 2 groups (tested using a Wilcoxon rank-sum or chi-square test for continuous and categorical features, respectively) was statistically significant.</p>
        </sec>
        <sec>
          <title>Feature Transformation</title>
          <p>A total of 6 (10%) studies performed feature transformation before cluster analysis; the methods are summarized in <xref ref-type="table" rid="table5">Table 5</xref>. Of the 4 studies that used PCA for feature transformation, 3 used continuous input features [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>], whereas the fourth used mixed-type input features [<xref ref-type="bibr" rid="ref46">46</xref>]. None of the studies stated whether the covariance or correlation matrix was used as input for PCA. Only Newby et al [<xref ref-type="bibr" rid="ref45">45</xref>] specified the number of transformed features retained, and the proportion of original variance accounted for.</p>
          <p>Khusial et al [<xref ref-type="bibr" rid="ref31">31</xref>] performed factor analysis on a subset of the selected features; it is unclear whether categorical features are included in this subset. Although the resulting factors were scaled to z-scores, the authors did not provide further information regarding whether the features were scaled before factor analysis. Four factors were retained, but neither the proportion of variance explained by these factors nor a table of the factor loadings is given.</p>
          <p>Sendín-Hernández et al [<xref ref-type="bibr" rid="ref47">47</xref>] performed MCA to transform 5 continuous and 14 categorical features. They gave the proportion of variance explained by the transformed features but gave neither the number of transformed features retained nor a table of the feature loadings.</p>
        </sec>
      </sec>
      <sec>
        <title>Cluster Analysis</title>
        <sec>
          <title>Hierarchical Clustering</title>
          <p>A total of 23 (37%) studies applied hierarchical clustering with the Ward method [<xref ref-type="bibr" rid="ref48">48</xref>] as the principal clustering technique. A breakdown of the methods used by these studies is given in <xref ref-type="table" rid="table2">Table 2</xref>. One study applied these methods to continuous data, and the remaining 22 studies used mixed-type data. Three studies stated that the Euclidean distance was used, 4 used Gower coefficient (issues with the Gower coefficient combined with the Ward method are addressed in the <italic>Discussion</italic> section), and 1 used tree-based dissimilarity measure [<xref ref-type="bibr" rid="ref49">49</xref>]. For the remaining 15 studies, we assumed that the Euclidean distance was used. Of the 23 studies, 11 did not detail whether the features were rescaled. Of the 17 studies using the Euclidean distance with mixed-type features, 8 encoded categorical features as binary features.</p>
          <p>A total of 3 (5%) further studies (in addition to the 23 studies introduced at the start of the paragraph) applied hierarchical clustering to continuous data. Amore et al [<xref ref-type="bibr" rid="ref39">39</xref>] used the average linkage and the Euclidean distance, whereas 2 studies used hierarchical clustering but did not specify the linkage or dissimilarity measure used [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref50">50</xref>].</p>
        </sec>
        <sec>
          <title>k-Means</title>
          <p>A total of 22 (35%) studies used k-means clustering as the principal clustering technique. A breakdown of the methods used by these 3 studies is given in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>. A breakdown of the methods used by these studies is given in <xref ref-type="table" rid="table3">Table 3</xref>. Five studies applied k-means to continuous data, and 13 studies applied it to mixed-type data. In 3 studies, the cluster features were not explicitly stated, and the data types therefore were unclear. Of the 22 studies, 4 explicitly stated that the Euclidean distance was used. As no other dissimilarity metrics were mentioned, we assumed that the Euclidean distance was used in the remaining 18 studies because it is often the default option for most algorithmic packages. Of the 22, 11 studies did not detail whether continuous features were scaled before cluster analysis. Of the 13 studies with mixed-type data, 8 encoded categorical features as binary features.</p>
        </sec>
        <sec>
          <title>Preclustering Methods</title>
          <p>When dealing with very large sample sizes, it can be advantageous to introduce a precluster step. The aim is to group samples and to use these groups or <italic>preclusters</italic> as input to a follow-on clustering algorithm (ie, using 2 steps with cascaded cluster algorithms). This step is used to reduce the computation time required to compute the cluster results.</p>
          <p>A total of 7 (11%) studies used the SPSS TwoStep clustering method [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]. A breakdown of the preprocessing methods and distance measures used by these studies is given in <xref ref-type="table" rid="table4">Table 4</xref>. In the first (precluster) step, a cluster feature tree is identified. In the second step, the preclusters are merged stepwise until all clusters are in 1 cluster using the Euclidean or log-likelihood distance for continuous or mixed-type features, respectively. An advantage of the log-likelihood distance measure is that it is designed to handle mixed-type features. However, in doing so, it assumes that continuous (categorical) features follow a normal (multinomial) distribution within clusters.</p>
          <p>None of the studies in this review adequately considered the distributional assumptions made by the SPSS TwoStep method. Ruggieri et al [<xref ref-type="bibr" rid="ref53">53</xref>] acknowledged that the method assumes continuous features are normally distributed, but they did not explicitly report whether these assumptions were satisfied. Although Newby et al [<xref ref-type="bibr" rid="ref45">45</xref>] acknowledged that the method assumes cluster features are statistically independent within clusters, they only go as far as to ensure that their cluster features are uncorrelated (by applying PCA), which does not necessarily imply independence. The remaining 5 studies that used the SPSS TwoStep method did not reference distributional assumptions.</p>
          <p>Two (3%) further studies preclustered samples (Just et al [<xref ref-type="bibr" rid="ref34">34</xref>] specified k-means, and Ye et al [<xref ref-type="bibr" rid="ref54">54</xref>] did not specify the precluster method) and then applied hierarchical clustering with the Ward linkage method on the preclusters. A breakdown of the methods used by these 2 studies is given in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
        </sec>
        <sec>
          <title>k-Medoid Methods</title>
          <p>Three studies used k-medoid methods. A breakdown of the methods used by these 3 studies is given in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>. Two used k-medoids implemented by the Partition Around Medoids algorithm [<xref ref-type="bibr" rid="ref55">55</xref>]. Lefaudeux et al [<xref ref-type="bibr" rid="ref30">30</xref>] used the Euclidean distance with center-scaled continuous data, and Sekiya et al [<xref ref-type="bibr" rid="ref56">56</xref>] used the Gower metric with mixed-type data. Loza et al [<xref ref-type="bibr" rid="ref57">57</xref>] applied fuzzy partition-around-medoid clustering with the Euclidean distance to continuous data scaled with average absolute deviation.</p>
        </sec>
        <sec>
          <title>Kernel k-Means and Spectral Clustering</title>
          <p>Kernel k-means and spectral clustering are different but related methods, which may be used to identify clusters that are not linearly separable in the input feature space [<xref ref-type="bibr" rid="ref58">58</xref>]. As these methods were used by only 1 study each (Wu et al used multiple kernel k-means [<xref ref-type="bibr" rid="ref59">59</xref>], and Howrylak et al used spectral clustering [<xref ref-type="bibr" rid="ref37">37</xref>]), we do not explore them in detail in this review. However, details of the feature scaling, encoding, and distance measures used by these 2 studies is given in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
        </sec>
        <sec>
          <title>Unclear Methods</title>
          <p>Wang et al [<xref ref-type="bibr" rid="ref41">41</xref>] described a 2-step clustering method in which the first step was to carry out hierarchical clustering using the Ward method, but with the log-likelihood distance in place of the Euclidean distance. This first step was used to determine the number of clusters, which was then used in the k-means method in the second step. However, the authors cite the SPSS TwoStep method [<xref ref-type="bibr" rid="ref52">52</xref>], which is different from that described previously. It was therefore ambiguous which clustering method was applied in this study.</p>
        </sec>
      </sec>
      <sec>
        <title>Postprocessing</title>
        <sec>
          <title>Choosing the Number of Clusters</title>
          <p>A total of 54 (86%) studies explained in detail the methods used to select the number of clusters. Of these, 20 (32%) studies used more than one method for choosing the number of clusters. The maximum number of methods used was 6.</p>
          <p>A total of 27 (43%) studies used a dendrogram to choose the number of clusters to include in their study (<xref ref-type="table" rid="table6">Table 6</xref>). Note that 18 of the 22 studies that applied k-means clustering used hierarchical cluster as a first step to identify the likely number of clusters. Of these 18 studies, 11 explicitly stated that the dendrogram was used to choose the number of clusters.</p>
          <p>Of the 8 (13%) studies that specified a maximum number of clusters, the maximum number ranged between 2 and 15 clusters. Seven (11%) studies used a statistic (or multiple statistics), including the c-index [<xref ref-type="bibr" rid="ref60">60</xref>], Gap statistic [<xref ref-type="bibr" rid="ref37">37</xref>], deviation from ideal stability [<xref ref-type="bibr" rid="ref30">30</xref>], Calinski and Harabasz index [<xref ref-type="bibr" rid="ref30">30</xref>], Dunn’s partition [<xref ref-type="bibr" rid="ref57">57</xref>], cubic cluster criterion (CCC) statistic [<xref ref-type="bibr" rid="ref28">28</xref>], pseudo-F statistic [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref36">36</xref>], and pseudo-T2 statistic [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref36">36</xref>].</p>
          <p>Four studies (6%) avoided very small clusters. Approaches to this include merging 2 clusters containing 6 and 12 samples [<xref ref-type="bibr" rid="ref61">61</xref>], omitting small clusters containing 1 [<xref ref-type="bibr" rid="ref35">35</xref>] and 6 [<xref ref-type="bibr" rid="ref62">62</xref>] samples, and choosing the number such that no cluster contained less than 10% of the total samples [<xref ref-type="bibr" rid="ref63">63</xref>].</p>
        </sec>
        <sec>
          <title>Stability</title>
          <p>A total of 11 (17%) studies tested the stability of their cluster solution; the methods are detailed in <xref ref-type="table" rid="table6">Table 6</xref>. Of these, 1 study used 2 methods, and the remaining 10 each used only 1 method to test stability.</p>
        </sec>
        <sec>
          <title>Quality</title>
          <p>A total of 24 (38%) studies assessed the quality of their solution using methods beyond those assessing stability. The methods are detailed in <xref ref-type="table" rid="table6">Table 6</xref>. Of these, 3 used more than one method. The maximum number of methods used in this study was 4.</p>
          <p>Of the 30 studies that assessed the stability or quality of their cluster analysis, 21 (70%) reported their findings. However, the reporting of these results was in many cases brief, consisting of statements such as “the clusters were shown to be stable” without providing supporting evidence.</p>
          <table-wrap position="float" id="table6">
            <label>Table 6</label>
            <caption>
              <p>Postprocessing methods used in the asthma studies included in this review.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="670"/>
              <col width="300"/>
              <thead>
                <tr valign="bottom">
                  <td colspan="2">Method</td>
                  <td>Values, n (%)<sup>a</sup></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Choosing the number of clusters</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Dendrogram</td>
                  <td>27 (43)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Hierarchical clustering with Ward linkage</td>
                  <td>19 (30)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Specify a maximum number of clusters<sup>b</sup></td>
                  <td>8 (13)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Statistic(s)</td>
                  <td>7 (11)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Silhouette plot or average silhouette width</td>
                  <td>5 (8)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Bayesian information criterion</td>
                  <td>4 (6)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Specify a minimum size of smallest cluster<sup>b</sup></td>
                  <td>4 (6)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Previous studies</td>
                  <td>3 (5)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Unclear</td>
                  <td>3 (5)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Clinical interpretation</td>
                  <td>2 (3)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Scree plot</td>
                  <td>1 (2)</td>
                </tr>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Stability</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Repeated in random subset</td>
                  <td>3 (5)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Leave-one-out cross-validation</td>
                  <td>3 (5)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Bootstrap methods</td>
                  <td>3 (5)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Unclear methods</td>
                  <td>2 (3)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Train and test set</td>
                  <td>1 (2)</td>
                </tr>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Quality</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Repeated in selected subset</td>
                  <td>8 (13)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Repeated with difference methods</td>
                  <td>6 (10)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Repeated with different initial configurations</td>
                  <td>5 (8)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Repeated in separate cohort</td>
                  <td>4 (6)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Repeated with altered features</td>
                  <td>3 (5)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Repeated at different time point</td>
                  <td>3 (5)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Repeated with different software</td>
                  <td>1 (2)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table6fn1">
                <p><sup>a</sup>Studies may have used more than 1 method.</p>
              </fn>
              <fn id="table6fn2">
                <p><sup>b</sup>These methods were not included when calculating the number of methods used to choose the number of clusters.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We identified 63 studies that applied cluster analysis to multimodal clinical data to identify subtypes of asthma. We explored the clustering methodologies and their limitations in detail. The principal finding of this review was that the majority of the reviewed studies have flaws in the application of cluster analysis. Although some of these flaws were related to the multimodal nature of the clinical data, they extended to aspects of cluster analysis, which are agnostic of data type, such as sample size, stability, and reporting of the results.</p>
        <p>These findings build on a previous review, which identified limitations such as lack of robustness in feature selection and neglect to specify distance measures in studies using cluster analysis to contribute to our understanding of the spectrum of asthma syndrome [<xref ref-type="bibr" rid="ref11">11</xref>]. Our review investigated the methods of feature engineering more generally and identified not only neglect to specify dissimilarity measures but also instances in which the dissimilarity measure was inappropriate for the data to which it was applied. In addition, we identified issues related to sample size, cluster analysis methods, choosing the number of clusters, and testing the stability and quality of results. These issues are discussed in the following paragraphs.</p>
        <p>A widespread limitation in the reviewed studies was the small sample size. Studies had overall sample sizes as small as 40 patients, with clusters as small as 6 patients. We argue that there is limited utility in clustering data with such small sample sizes: they may result in clusters that are unstable [<xref ref-type="bibr" rid="ref64">64</xref>] and may therefore lead to selecting fewer clusters than are present in the underlying population from which the data are sampled.</p>
        <p>In the following paragraphs, we discussed the limitations of 3 of the feature selection approaches applied by the reviewed studies. The first approach was to avoid collinearity or multicollinearity or excluding features that were considered to be <italic>clinically redundant</italic>. Although one should avoid including features that are <italic>redundant</italic> (can be completely deduced from a combination of the other cluster features), this is rarely the case. Therefore, removing features inevitably leads to loss of information. We suggest that the removal of features based on redundancy needs to be carefully considered, for example, 2 or more features (some of which may appear univariately redundant) may jointly contribute toward determining a cluster (or similarly toward the estimation of a clinical outcome in a standard supervised learning setup).</p>
        <p>The second was the use of PCA or factor analysis to select features, which has a similar motivation to the concept described earlier for discarding statistically correlated features. There are methodological justifications for the use of PCA, factor analysis, or other nonlinear embedding methods for feature transformation [<xref ref-type="bibr" rid="ref19">19</xref>]. They aim to jointly combine the original features and project them in a new feature space, which may have some useful properties, including interpretation, determining latent feature structure, and improving the clustering or statistical mapping outcomes [<xref ref-type="bibr" rid="ref16">16</xref>]. However, we suggest exercising caution toward using these methods for feature selection as described in some of the studies summarized in the Results section of this review because they were fundamentally developed toward different aims. Haldar et al used PCA for feature selection in the first publication to apply cluster analysis to identify asthma subtypes [<xref ref-type="bibr" rid="ref14">14</xref>]. It is possible that other studies used this as a point of reference for these methods, leading to the common application of these methods in the field of asthma subtyping.</p>
        <p>The third approach to feature selection was the use of statistical hypothesis tests with respect to outcomes of interest, as done in 2 studies [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. Methods in which an outcome of interest is used to guide feature selection in cluster analysis have been described previously [<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref66">66</xref>]. Although these approaches may be useful for situations in which there exists an outcome of particular interest to the clustering problem, the user should be aware of and acknowledge the assumptions made in the process. In the context of the 2 reviewed studies that used this approach, Sakagami et al did not acknowledge the linearity assumption in linear regression [<xref ref-type="bibr" rid="ref42">42</xref>], whereas Seino et al’s method does not account for potentially highly correlated features [<xref ref-type="bibr" rid="ref43">43</xref>], a concept that is key in feature selection for cluster analysis.</p>
        <p>Feature transformation was applied in only 6 studies, and the methods were generally poorly reported. As with cluster analysis, feature encoding and scaling are important considerations in feature transformation, but none of the studies gave adequate details in their methods. The results of feature transformation were also poorly reported. Although the key reason for applying feature transformation methods is to reduce the dimensionality of the dataset, only 2 [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref45">45</xref>] of the 6 studies provided details on the number of features retained. We suggest that the results of PCA, factor analysis, or MCA should include a table of component (or factor) loadings, the number of features retained, and the proportion of variance accounted for in the transformed features.</p>
        <p>Most studies explicitly stated the clustering method that they used but were less explicit regarding the preprocessing steps and choice of dissimilarity measure. Hastie et al [<xref ref-type="bibr" rid="ref16">16</xref>] state, “Specifying an appropriate dissimilarity measure is far more important in obtaining success with clustering than choice of clustering algorithm.”</p>
        <p>We expand on this statement, further adding that preprocessing steps such as feature scaling and feature encoding are also more important in obtaining success than the choice clustering algorithm. This is in line with the conclusions of Prosperi et al, who demonstrated that clustering using different feature sets and encodings in asthma datasets can lead to different cluster solutions [<xref ref-type="bibr" rid="ref29">29</xref>]. Both preprocessing steps and dissimilarity measures, along with their relation to clustering algorithms, have been given poor consideration in clustering applications in asthma, as discussed in the following 3 paragraphs.</p>
        <p>First, the Euclidean distance was used with mixed-type data in over half of the studies (54%). Although the Euclidean distance is intended for use with continuous data, problems associated with applying it to mixed-type data may be mitigated by carefully considering feature scaling and feature encoding. However, in our review, we found that many studies did not specify their methods for rescaling, and many studies included ordinal and nominal categorical features but did not specify how these would be treated when calculating the Euclidean distances. The lack of consideration of feature scaling and encoding in these cases may have resulted in assigning an unintended weight structure to the cluster features.</p>
        <p>Second, 4 studies used Gower coefficient in hierarchical clustering with Ward linkage [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref67">67</xref>-<xref ref-type="bibr" rid="ref69">69</xref>], and 1 used tree-based distances [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref70">70</xref>]. These studies should be given some credit for using dissimilarities that can handle mixed-type data. However, the application of hierarchical clustering with Ward linkage relies on the properties of the Euclidean distance in the computations. These properties do not hold for Gower coefficient, and hence, errors are perpetuated at each level of the hierarchy. An example that demonstrates this issue is given in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p>
        <p>A final point in the use of k-means and hierarchical clustering using the Ward method with mixed cluster features is that the theory underpinning these methods involves the calculation of cluster means. The mean is not an appropriate summary statistic for categorical features, which are more typically summarized by the mode. For this reason, we suggest that k-medoids may be a more appropriate method for mixed-type features used in clustering. Instead of computing each cluster’s mean (as with hierarchical clustering using Ward’s method and k-means), k-medoids compute each cluster’s medoid, defined as the sample in the cluster for which the average dissimilarity to all other samples in the cluster is minimized [<xref ref-type="bibr" rid="ref55">55</xref>]. In addition, k-medoids do not rely on the properties of the Euclidean distance in the computations, thus avoiding the issue described in the previous paragraph. Despite these advantages, only 2 studies in this review used k-medoids [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref56">56</xref>].</p>
        <p>The SPSS TwoStep method was used in 7 of the 63 studies investigated here. We see 2 key limitations with the application of this method across the reviewed studies. First, none of the studies gave adequate consideration to the distributional assumptions made when using the log-likelihood distance, and most did not mention the assumptions at all. Second, this method is designed for clustering several millions of samples with many features within an acceptable time and makes a key compromise in doing so [<xref ref-type="bibr" rid="ref52">52</xref>]. This compromise is that the data are not stored in the main memory but are read sequentially, hence making the solution sensitive to the ordering of the data. None of the studies acknowledged this inherent shortcoming, nor did they confirm that their data were in a random order. Perhaps, more concerningly, the studies that applied these methods actually had very small datasets (range 84-349 samples) that could easily be stored, therefore making other standard techniques more appropriate. In our view, this compromise was therefore unnecessary.</p>
        <p>Only 1 study [<xref ref-type="bibr" rid="ref57">57</xref>] used a method that obtains a <italic>fuzzy</italic> cluster solution (in which a patient may be assigned a membership value to multiple clusters), as opposed to a <italic>hard</italic> cluster solution (in which each patient is assigned to a single cluster) [<xref ref-type="bibr" rid="ref23">23</xref>]. A fuzzy cluster solution can indicate where a patient membership value is similar across multiple clusters, whereas this information is lost (or leads to lack of stability) in a hard cluster solution. Owing to the noisy nature of clinical data and the clinical complexity of grouping patients into distinct groups, we suggest that fuzzy cluster solutions may be more appropriate than hard cluster solutions in the review applications in asthma. However, it is important to acknowledge that there are added challenges in the interpretation and communication of fuzzy cluster solutions and that the methods may be more computationally intensive [<xref ref-type="bibr" rid="ref71">71</xref>].</p>
        <p>Selecting the number of clusters can be challenging and depends largely on the context of the application. In the case of the reviewed applications in asthma, the <italic>true</italic> number of clusters is unknown, and the analyses are exploratory. Although 86% of the review studies gave some details regarding their methods for choosing the number of clusters (<italic>k</italic>), they were generally poorly reported. The most popular approach was the dendrogram, but only Labor et al [<xref ref-type="bibr" rid="ref72">72</xref>] specified their criteria for cutting the dendrogram. In 14 studies, the dendrogram was the only method mentioned. We suggest that more than one method should be used to select the number of clusters to validate this decision.</p>
        <p>Our review shows that studies rarely tested the stability and quality of their results, with a particular lack of emphasis on stability. This is concerning, as many studies use methods such as k-means, which reach local minima, and apply them to small sample sizes, thus increasing the risk of obtaining unstable results. We argue that because of the unsupervised nature of cluster analysis, testing the stability and quality of the results should be a key theme and would like to urge researchers and peer reviewers in this research field to carefully consider these aspects. However, we do appreciate that assessing the stability and quality of a solution in the absence of <italic>ground truth</italic> is challenging and that there are currently no well-established frameworks for doing so [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
        <p>Although this review focused on applications in subtyping asthma, the identified issues have been found in studies using cluster analysis to subtype other diseases. For example, recent studies in autism [<xref ref-type="bibr" rid="ref73">73</xref>] and hypersomnolence [<xref ref-type="bibr" rid="ref74">74</xref>] have applied cluster analysis to very small samples (55 and 17 patients, respectively). A recent study on Parkinson disease [<xref ref-type="bibr" rid="ref75">75</xref>] stated in the main text that a <italic>model-based</italic> cluster analysis method was used, whereas the supplementary materials revealed that the method was in fact k-means, which is not model-based. In addition, supplementary materials listed 3 methods for choosing the number of clusters (CCC, pseudo-F, and R-squared statistics) but did not present the results from these 3 methods anywhere in the main text or supplementary materials. These findings demonstrate the widespread nature of the issues that this review has highlighted, and that the issues are not restricted to asthma-related studies.</p>
        <p>For a recent example of a well-considered and well-reported application of cluster analysis to multimodal clinical data, we refer the reader to Pikoula et al’s study of Chronic Obstructive Pulmonary Disease subtypes [<xref ref-type="bibr" rid="ref76">76</xref>]. The main text and supplementary materials provide a transparent report of the methodology with respect to feature engineering and cluster analysis methods. In particular, Pikoula et al performed a rigorous assessment of the stability, reproducibility, and sensitivity of the resulting clusters, which could be used as a framework for future studies. The results that were key to the study’s conclusions (eg, MCA feature loadings, silhouette plots, results from stability, reproducibility, and sensitivity analyses) are correctly reported in the manuscript, enabling readers to have a thorough understanding of the study’s findings.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>The literature search presented in this study is comprehensive but practically cannot be exhaustive. We restricted the search to articles that included the terms <italic>cluster analysis</italic> or <italic>clustering*</italic>. Although it is not strictly speaking correct to do so, some studies in the medical literature use the term <italic>classification</italic> to refer to cluster analysis, often confusing the 2 terms and sometimes using them almost interchangeably, for example, see the studies by Just et al [<xref ref-type="bibr" rid="ref34">34</xref>] and Kim et al [<xref ref-type="bibr" rid="ref46">46</xref>]. Widening the search to identify studies that use the term <italic>classification</italic> would have greatly increased the initial number of results of the PubMed search, but we suspect that the increase in the number of eligible studies for cluster analysis identified would have been small. Similarly, the terms <italic>latent class analysis</italic> and <italic>mixture model analysis</italic> might sometimes be erroneously used to refer to cluster analysis: we clarify that these terms were not included in our search strategy. As this is not a systematic review, we feel that our search criteria are fully sufficient for this study’s purposes.</p>
        <p>We did not fully explore multiple kernel k-means [<xref ref-type="bibr" rid="ref77">77</xref>] or spectral clustering [<xref ref-type="bibr" rid="ref78">78</xref>] methods, each used by 1 study in this review. As with all other cluster analysis methods mentioned here, careful consideration must be taken when applying these methods to mixed-type data. There are numerous other considerations that are important to these methods, such as the choice of kernel function, but these are beyond the scope of this review.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This review highlights a number of issues in previous applications of cluster analysis to multimodal clinical data in asthma. We make the following key recommendations based on these findings:</p>
        <list list-type="bullet">
          <list-item>
            <p>Careful consideration should be given to the preprocessing of multimodal clinical data and how the scaling and encoding of features may affect their weighting in the analysis.</p>
          </list-item>
          <list-item>
            <p>The choice of dissimilarity measures and cluster analysis methods are dependent on one another as well as on the scaling and encoding of the data. Certain combinations of these data analytics components may be incompatible and give unreliable results.</p>
          </list-item>
          <list-item>
            <p>The stability and quality of the cluster results should be thoroughly evaluated.</p>
          </list-item>
        </list>
        <p>The abovementioned recommendations focus on the application of cluster analysis, but we put similar emphasis on the clear reporting of each of the abovementioned points, as this was also found to be lacking in the reviewed papers.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses checklist.</p>
        <media xlink:href="medinform_v8i5e16452_app1.doc" xlink:title="DOC File , 64 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Data dictionary.</p>
        <media xlink:href="medinform_v8i5e16452_app2.docx" xlink:title="DOCX File , 22 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Study characteristics.</p>
        <media xlink:href="medinform_v8i5e16452_app3.docx" xlink:title="DOCX File , 85 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Breakdown of methods used by the 11 studies that did not use the three most common clustering methods.</p>
        <media xlink:href="medinform_v8i5e16452_app4.docx" xlink:title="DOCX File , 16 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Illustrative example of the use of Gower coefficient with hierarchical clustering and Ward linkage.</p>
        <media xlink:href="medinform_v8i5e16452_app5.docx" xlink:title="DOCX File , 12 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CCC</term>
          <def>
            <p>cubic cluster criterion</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">HDR</term>
          <def>
            <p>Health Data Research</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">MCA</term>
          <def>
            <p>multiple correspondence analysis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">PCA</term>
          <def>
            <p>principal component analysis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">PRISMA</term>
          <def>
            <p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was supported by the Health Data Research, United Kingdom (HDR UK), which receives funding from HDR UK Ltd (HDR-5012) funded by the UK Medical Research Council, Engineering and Physical Sciences Research Council, Economic and Social Research Council, Department of Health and Social Care (England), Chief Scientist Office of the Scottish Government Health and Social Care Directorates, Health and Social Care Research and Development Division (Welsh Government), Public Health Agency (Northern Ireland), the British Heart Foundation, and the Wellcome Trust and by the Asthma UK Centre for Applied Research, which is funded by Asthma UK. The funders had no role in the study or the decision to submit this work to be considered for publication.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>EH was responsible for conducting the study. EH conducted the identification of articles and screened them for eligibility. EH and HT independently extracted data according to the described methodology and synthesized the findings. EH wrote up the first draft of the manuscript, and AT, AS, and HT contributed to the final version.</p>
      </fn>
      <fn fn-type="conflict">
        <p>AS is supported by a research grant from the Asthma UK Centre for Applied Research. All other authors have no conflict of interest pertaining to this study to declare.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lawton</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ben-Shlomo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>May</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Baig</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Barber</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Swallow</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Malek</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Grosset</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Bajaj</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Barker</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Burn</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Foltynie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>HR</given-names>
            </name>
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>NW</given-names>
            </name>
            <name name-style="western">
              <surname>Grosset</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>MT</given-names>
            </name>
          </person-group>
          <article-title>Developing and validating Parkinson's disease subtypes and their motor and cognitive progression</article-title>
          <source>J Neurol Neurosurg Psychiatry</source>
          <year>2018</year>
          <month>12</month>
          <volume>89</volume>
          <issue>12</issue>
          <fpage>1279</fpage>
          <lpage>87</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jnnp.bmj.com/cgi/pmidlookup?view=long&#38;pmid=30464029"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jnnp-2018-318337</pub-id>
          <pub-id pub-id-type="medline">30464029</pub-id>
          <pub-id pub-id-type="pii">jnnp-2018-318337</pub-id>
          <pub-id pub-id-type="pmcid">PMC6288789</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ousley</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Cermak</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Autism spectrum disorder: defining dimensions and subgroups</article-title>
          <source>Curr Dev Disord Rep</source>
          <year>2014</year>
          <month>03</month>
          <day>1</day>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>20</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25072016"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40474-013-0003-1</pub-id>
          <pub-id pub-id-type="medline">25072016</pub-id>
          <pub-id pub-id-type="pmcid">PMC4111262</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Haritunians</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Landers</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Potdar</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Schumm</surname>
              <given-names>LP</given-names>
            </name>
            <name name-style="western">
              <surname>Daly</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Targan</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>McGovern</surname>
              <given-names>DP</given-names>
            </name>
          </person-group>
          <article-title>Late-onset Crohn's disease is a subgroup distinct in genetic and behavioral risk factors with UC-like characteristics</article-title>
          <source>Inflamm Bowel Dis</source>
          <year>2018</year>
          <month>10</month>
          <day>12</day>
          <volume>24</volume>
          <issue>11</issue>
          <fpage>2413</fpage>
          <lpage>22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29860388"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/ibd/izy148</pub-id>
          <pub-id pub-id-type="medline">29860388</pub-id>
          <pub-id pub-id-type="pii">5025805</pub-id>
          <pub-id pub-id-type="pmcid">PMC6195175</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bowman</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Flanagan</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Hattersley</surname>
              <given-names>AT</given-names>
            </name>
          </person-group>
          <article-title>Future roadmaps for precision medicine applied to diabetes: rising to the challenge of heterogeneity</article-title>
          <source>J Diabetes Res</source>
          <year>2018</year>
          <volume>2018</volume>
          <fpage>3061620</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2018/3061620"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2018/3061620</pub-id>
          <pub-id pub-id-type="medline">30599002</pub-id>
          <pub-id pub-id-type="pmcid">PMC6288579</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sidhaye</surname>
              <given-names>VK</given-names>
            </name>
            <name name-style="western">
              <surname>Nishida</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Martinez</surname>
              <given-names>FJ</given-names>
            </name>
          </person-group>
          <article-title>Precision medicine in COPD: where are we and where do we need to go?</article-title>
          <source>Eur Respir Rev</source>
          <year>2018</year>
          <month>09</month>
          <day>30</day>
          <volume>27</volume>
          <issue>149</issue>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://err.ersjournals.com/cgi/pmidlookup?view=long&#38;pmid=30068688"/>
          </comment>
          <pub-id pub-id-type="doi">10.1183/16000617.0022-2018</pub-id>
          <pub-id pub-id-type="medline">30068688</pub-id>
          <pub-id pub-id-type="pii">27/149/180022</pub-id>
          <pub-id pub-id-type="pmcid">PMC6156790</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Späth</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Marjani</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Characterization of cancer genomic heterogeneity by next-generation sequencing advances precision medicine in cancer treatment</article-title>
          <source>Precis Clin Med</source>
          <year>2018</year>
          <month>06</month>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>29</fpage>
          <lpage>48</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30687561"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/pcmedi/pby007</pub-id>
          <pub-id pub-id-type="medline">30687561</pub-id>
          <pub-id pub-id-type="pii">pby007</pub-id>
          <pub-id pub-id-type="pmcid">PMC6333046</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pavord</surname>
              <given-names>ID</given-names>
            </name>
            <name name-style="western">
              <surname>Beasley</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Agusti</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Bel</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Brusselle</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cullinan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Custovic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ducharme</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Fahy</surname>
              <given-names>JV</given-names>
            </name>
            <name name-style="western">
              <surname>Frey</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Gibson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Heaney</surname>
              <given-names>LG</given-names>
            </name>
            <name name-style="western">
              <surname>Holt</surname>
              <given-names>PG</given-names>
            </name>
            <name name-style="western">
              <surname>Humbert</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lloyd</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Marks</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Martinez</surname>
              <given-names>FD</given-names>
            </name>
            <name name-style="western">
              <surname>Sly</surname>
              <given-names>PD</given-names>
            </name>
            <name name-style="western">
              <surname>von Mutius</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wenzel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zar</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bush</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>After asthma: redefining airways diseases</article-title>
          <source>Lancet</source>
          <year>2018</year>
          <month>01</month>
          <day>27</day>
          <volume>391</volume>
          <issue>10118</issue>
          <fpage>350</fpage>
          <lpage>400</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(17)30879-6</pub-id>
          <pub-id pub-id-type="medline">28911920</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(17)30879-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>GBD 2017 Disease and Injury Incidence and Prevalence Collaborators</collab>
          </person-group>
          <article-title>Global, regional, and national incidence, prevalence, and years lived with disability for 354 diseases and injuries for 195 countries and territories, 1990-2017: a systematic analysis for the Global Burden of Disease Study 2017</article-title>
          <source>Lancet</source>
          <year>2018</year>
          <month>11</month>
          <day>10</day>
          <volume>392</volume>
          <issue>10159</issue>
          <fpage>1789</fpage>
          <lpage>858</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0140-6736(18)32279-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(18)32279-7</pub-id>
          <pub-id pub-id-type="medline">30496104</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(18)32279-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC6227754</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>GBD 2017 Causes of Death Collaborators</collab>
          </person-group>
          <article-title>Global, regional, and national age-sex-specific mortality for 282 causes of death in 195 countries and territories, 1980-2017: a systematic analysis for the Global Burden of Disease Study 2017</article-title>
          <source>Lancet</source>
          <year>2018</year>
          <month>11</month>
          <day>10</day>
          <volume>392</volume>
          <issue>10159</issue>
          <fpage>1736</fpage>
          <lpage>88</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0140-6736(18)32203-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(18)32203-7</pub-id>
          <pub-id pub-id-type="medline">30496103</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(18)32203-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC6227606</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rackemann</surname>
              <given-names>FM</given-names>
            </name>
          </person-group>
          <article-title>A working classification of asthma</article-title>
          <source>Am J Med</source>
          <year>1947</year>
          <month>11</month>
          <volume>3</volume>
          <issue>5</issue>
          <fpage>601</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1016/0002-9343(47)90204-0</pub-id>
          <pub-id pub-id-type="medline">20269240</pub-id>
          <pub-id pub-id-type="pii">0002-9343(47)90204-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deliu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sperrin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Belgrave</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Custovic</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Identification of asthma subtypes using clustering methodologies</article-title>
          <source>Pulm Ther</source>
          <year>2016</year>
          <volume>2</volume>
          <fpage>19</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27512723"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s41030-016-0017-z</pub-id>
          <pub-id pub-id-type="medline">27512723</pub-id>
          <pub-id pub-id-type="pii">17</pub-id>
          <pub-id pub-id-type="pmcid">PMC4959136</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>The Elements of Statistical Learning: Data Mining, Inference, and Prediction</source>
          <year>2009</year>
          <publisher-loc>New York, USA</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hennig</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>TF</given-names>
            </name>
          </person-group>
          <article-title>How to find an appropriate clustering for mixed-type variables with application to socio-economic stratification</article-title>
          <source>J R Stat Soc Ser C Appl Stat</source>
          <year>2013</year>
          <volume>62</volume>
          <issue>3</issue>
          <fpage>309</fpage>
          <lpage>69</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1467-9876.2012.01066.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haldar</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pavord</surname>
              <given-names>ID</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Berry</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Brightling</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Wardlaw</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>RH</given-names>
            </name>
          </person-group>
          <article-title>Cluster analysis and clinical asthma phenotypes</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2008</year>
          <month>08</month>
          <day>1</day>
          <volume>178</volume>
          <issue>3</issue>
          <fpage>218</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18480428"/>
          </comment>
          <pub-id pub-id-type="doi">10.1164/rccm.200711-1754OC</pub-id>
          <pub-id pub-id-type="medline">18480428</pub-id>
          <pub-id pub-id-type="pmcid">PMC3992366</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Howard</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rattray</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prosperi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Custovic</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Distinguishing asthma phenotypes using machine learning approaches</article-title>
          <source>Curr Allergy Asthma Rep</source>
          <year>2015</year>
          <month>07</month>
          <volume>15</volume>
          <issue>7</issue>
          <fpage>38</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26143394"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11882-015-0542-0</pub-id>
          <pub-id pub-id-type="medline">26143394</pub-id>
          <pub-id pub-id-type="pmcid">PMC4586004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Unsupervised learning</article-title>
          <source>The Elements of Statistical Learning: Data Mining, Inference, and Prediction</source>
          <year>2009</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>485</fpage>
          <lpage>552</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dolnicar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Grün</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Leisch</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Required sample sizes for data-driven market segmentation analyses in tourism</article-title>
          <source>J Travel Res</source>
          <year>2014</year>
          <volume>53</volume>
          <issue>3</issue>
          <fpage>296</fpage>
          <lpage>306</lpage>
          <pub-id pub-id-type="doi">10.1177/0047287513496475</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Overview of supervised learning</article-title>
          <source>The Elements of Statistical Learning: Data Mining, Inference, and Prediction (chapter 2)</source>
          <year>2009</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>9</fpage>
          <lpage>38</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ben-Hur</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Guyon</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Detecting stable clusters using principal component analysis</article-title>
          <source>Methods Mol Biol</source>
          <year>2003</year>
          <volume>224</volume>
          <fpage>159</fpage>
          <lpage>82</lpage>
          <pub-id pub-id-type="doi">10.1385/1-59259-364-X:159</pub-id>
          <pub-id pub-id-type="medline">12710673</pub-id>
          <pub-id pub-id-type="pii">1-59259-364-X-159</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van der Maaten</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Postma</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>van den Herik</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Dimensionality reduction: a comparative review</article-title>
          <source>J Mach Learn Res</source>
          <year>2009</year>
          <volume>10</volume>
          <fpage>66</fpage>
          <lpage>71</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://lvdmaaten.github.io/publications/papers/TR_Dimensionality_Reduction_Review_2009.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <source>A User's Guide to Principal Components</source>
          <year>1991</year>
          <publisher-loc>Jersey City, USA</publisher-loc>
          <publisher-name>Wiley-Blackwell</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pagès</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Multiple correspondence analysis</article-title>
          <source>Multiple factor Analysis by Example using R</source>
          <year>2018</year>
          <publisher-loc>Boca Raton, Florida</publisher-loc>
          <publisher-name>Chapman and Hall/CRC</publisher-name>
          <fpage>39</fpage>
          <lpage>66</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pagès</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Multiple factor analysis and procrustes analysis</article-title>
          <source>Multiple factor Analysis by Example using R</source>
          <year>2018</year>
          <publisher-loc>Boca Raton, Florida</publisher-loc>
          <publisher-name>Chapman and Hall/CRC</publisher-name>
          <fpage>189</fpage>
          <lpage>208</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gower</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>A general coefficient of similarity and some of its properties</article-title>
          <source>Biometrics</source>
          <year>1971</year>
          <month>12</month>
          <volume>27</volume>
          <issue>4</issue>
          <fpage>857</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.2307/2528823</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Everitt</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Landau</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Leese</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>Cluster Analysis. Fifth Edition</source>
          <year>2011</year>
          <publisher-loc>New York, USA</publisher-loc>
          <publisher-name>Wiley Publishing</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rousseeuw</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>Silhouettes: a graphical aid to the interpretation and validation of cluster analysis</article-title>
          <source>J Comput Appl Math</source>
          <year>1987</year>
          <month>11</month>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>53</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1016/0377-0427(87)90125-7</pub-id>
          <pub-id pub-id-type="pii">S0034-70942008000500005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>von Luxburg</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Clustering stability: an overview</article-title>
          <source>Found Trends Mach Learn</source>
          <year>2010</year>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>235</fpage>
          <lpage>74</lpage>
          <pub-id pub-id-type="doi">10.1561/2200000008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schatz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Zeiger</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Dorenbaum</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chipps</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Haselkorn</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Phenotypes determined by cluster analysis in severe or difficult-to-treat asthma</article-title>
          <source>J Allergy Clin Immunol</source>
          <year>2014</year>
          <month>06</month>
          <volume>133</volume>
          <issue>6</issue>
          <fpage>1549</fpage>
          <lpage>56</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jaci.2013.10.006</pub-id>
          <pub-id pub-id-type="medline">24315502</pub-id>
          <pub-id pub-id-type="pii">S0091-6749(13)01554-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Prosperi</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Sahiner</surname>
              <given-names>UM</given-names>
            </name>
            <name name-style="western">
              <surname>Belgrave</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sackesen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Buchan</surname>
              <given-names>IE</given-names>
            </name>
            <name name-style="western">
              <surname>Simpson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yavuz</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Kalayci</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Custovic</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Challenges in identifying asthma subgroups using unsupervised statistical learning techniques</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2013</year>
          <month>12</month>
          <day>1</day>
          <volume>188</volume>
          <issue>11</issue>
          <fpage>1303</fpage>
          <lpage>12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24180417"/>
          </comment>
          <pub-id pub-id-type="doi">10.1164/rccm.201304-0694OC</pub-id>
          <pub-id pub-id-type="medline">24180417</pub-id>
          <pub-id pub-id-type="pmcid">PMC3919072</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lefaudeux</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>de Meulder</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Loza</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Peffer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Rowe</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Baribaud</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Bansal</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Lutter</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sousa</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Corfield</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pandis</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Bakke</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Caruso</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chanez</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Dahlén</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Fleming</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Fowler</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Horvath</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Krug</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Montuschi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sanak</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sandstrom</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Singer</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sterk</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Adcock</surname>
              <given-names>IM</given-names>
            </name>
            <name name-style="western">
              <surname>Djukanovic</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Auffray</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>KF</given-names>
            </name>
            <collab>U-BIOPRED Study Group</collab>
          </person-group>
          <article-title>U-BIOPRED clinical adult asthma clusters linked to a subset of sputum omics</article-title>
          <source>J Allergy Clin Immunol</source>
          <year>2017</year>
          <month>06</month>
          <volume>139</volume>
          <issue>6</issue>
          <fpage>1797</fpage>
          <lpage>807</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jaci.2016.08.048</pub-id>
          <pub-id pub-id-type="medline">27773852</pub-id>
          <pub-id pub-id-type="pii">S0091-6749(16)31185-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khusial</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sont</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Loijmans</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Snoeck-Stroband</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Assendelft</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Schermer</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Honkoop</surname>
              <given-names>PJ</given-names>
            </name>
            <collab>ACCURATE Study Group</collab>
          </person-group>
          <article-title>Longitudinal outcomes of different asthma phenotypes in primary care, an observational study</article-title>
          <source>NPJ Prim Care Respir Med</source>
          <year>2017</year>
          <month>10</month>
          <day>3</day>
          <volume>27</volume>
          <issue>1</issue>
          <fpage>55</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28974677"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41533-017-0057-3</pub-id>
          <pub-id pub-id-type="medline">28974677</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41533-017-0057-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC5626703</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hsiao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Sex-specific asthma phenotypes, inflammatory patterns, and asthma control in a cluster analysis</article-title>
          <source>J Allergy Clin Immunol Pract</source>
          <year>2019</year>
          <month>02</month>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>556</fpage>
          <lpage>67.e15</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jaip.2018.08.008</pub-id>
          <pub-id pub-id-type="medline">30170162</pub-id>
          <pub-id pub-id-type="pii">S2213-2198(18)30529-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Busse</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Jarjour</surname>
              <given-names>NN</given-names>
            </name>
            <name name-style="western">
              <surname>Wenzel</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Meyers</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Bleecker</surname>
              <given-names>ER</given-names>
            </name>
            <collab>National Heart‚ Lung‚ Blood Institute's Severe Asthma Research Program</collab>
          </person-group>
          <article-title>Sputum neutrophil counts are associated with more severe asthma phenotypes using cluster analysis</article-title>
          <source>J Allergy Clin Immunol</source>
          <year>2014</year>
          <month>06</month>
          <volume>133</volume>
          <issue>6</issue>
          <fpage>1557</fpage>
          <lpage>63.e5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24332216"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jaci.2013.10.011</pub-id>
          <pub-id pub-id-type="medline">24332216</pub-id>
          <pub-id pub-id-type="pii">S0091-6749(13)01563-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4040309</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Just</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gouvis-Echraghi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rouve</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wanin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Moreau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Annesi-Maesano</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Two novel, severe asthma phenotypes identified during childhood using a clustering approach</article-title>
          <source>Eur Respir J</source>
          <year>2012</year>
          <month>07</month>
          <volume>40</volume>
          <issue>1</issue>
          <fpage>55</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://erj.ersjournals.com/cgi/pmidlookup?view=long&#38;pmid=22267763"/>
          </comment>
          <pub-id pub-id-type="doi">10.1183/09031936.00123411</pub-id>
          <pub-id pub-id-type="medline">22267763</pub-id>
          <pub-id pub-id-type="pii">09031936.00123411</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pérez-Losada</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Authelet</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hoptay</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Kwak</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Crandall</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Freishtat</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Pediatric asthma comprises different phenotypic clusters with unique nasal microbiotas</article-title>
          <source>Microbiome</source>
          <year>2018</year>
          <month>10</month>
          <day>4</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>179</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-018-0564-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s40168-018-0564-7</pub-id>
          <pub-id pub-id-type="medline">30286807</pub-id>
          <pub-id pub-id-type="pii">10.1186/s40168-018-0564-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC6172741</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wathen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Altaye</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mersha</surname>
              <given-names>TB</given-names>
            </name>
          </person-group>
          <article-title>African ancestry is associated with cluster-based childhood asthma subphenotypes</article-title>
          <source>BMC Med Genomics</source>
          <year>2018</year>
          <month>05</month>
          <day>31</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>51</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedgenomics.biomedcentral.com/articles/10.1186/s12920-018-0367-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12920-018-0367-5</pub-id>
          <pub-id pub-id-type="medline">29855310</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12920-018-0367-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC5984446</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Howrylak</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Fuhlbrigge</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Strunk</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Zeiger</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Raby</surname>
              <given-names>BA</given-names>
            </name>
            <collab>Childhood Asthma Management Program Research Group</collab>
          </person-group>
          <article-title>Classification of childhood asthma phenotypes and long-term clinical responses to inhaled anti-inflammatory medications</article-title>
          <source>J Allergy Clin Immunol</source>
          <year>2014</year>
          <month>05</month>
          <volume>133</volume>
          <issue>5</issue>
          <fpage>1289</fpage>
          <lpage>300, 1300.e1</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24892144"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jaci.2014.02.006</pub-id>
          <pub-id pub-id-type="medline">24892144</pub-id>
          <pub-id pub-id-type="pii">S0091-6749(14)00203-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC4047642</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loureiro</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Sa-Couto</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Todo-Bom</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bousquet</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Cluster analysis in phenotyping a Portuguese population</article-title>
          <source>Rev Port Pneumol (2006)</source>
          <year>2015</year>
          <month>09</month>
          <day>3</day>
          <comment>[Online ahead of print]</comment>
          <pub-id pub-id-type="doi">10.1016/j.rppnen.2015.07.006</pub-id>
          <pub-id pub-id-type="medline">26344641</pub-id>
          <pub-id pub-id-type="pii">S2173-5115(15)00138-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amore</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Antonucci</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bettini</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Boracchia</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Innamorati</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Montali</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Parisoli</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pisi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ramponi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chetta</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Disease control in patients with asthma is associated with alexithymia but not with depression or anxiety</article-title>
          <source>Behav Med</source>
          <year>2013</year>
          <volume>39</volume>
          <issue>4</issue>
          <fpage>138</fpage>
          <lpage>45</lpage>
          <pub-id pub-id-type="doi">10.1080/08964289.2013.818931</pub-id>
          <pub-id pub-id-type="medline">24236811</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cabral</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Sousa</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Mendes</surname>
              <given-names>FA</given-names>
            </name>
            <name name-style="western">
              <surname>Carvalho</surname>
              <given-names>CR</given-names>
            </name>
          </person-group>
          <article-title>Phenotypes of asthma in low-income children and adolescents: cluster analysis</article-title>
          <source>J Bras Pneumol</source>
          <year>2017</year>
          <volume>43</volume>
          <issue>1</issue>
          <fpage>44</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.scielo.br/scielo.php?script=sci_arttext&#38;pid=S1806-37132017005001102&#38;lng=en&#38;nrm=iso&#38;tlng=en"/>
          </comment>
          <pub-id pub-id-type="doi">10.1590/S1806-37562016000000039</pub-id>
          <pub-id pub-id-type="medline">28125150</pub-id>
          <pub-id pub-id-type="pii">S1806-37132017005001102</pub-id>
          <pub-id pub-id-type="pmcid">PMC5790664</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>HP</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Gibson</surname>
              <given-names>PG</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Identification and validation of asthma phenotypes in Chinese population using cluster analysis</article-title>
          <source>Ann Allergy Asthma Immunol</source>
          <year>2017</year>
          <month>10</month>
          <volume>119</volume>
          <issue>4</issue>
          <fpage>324</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1016/j.anai.2017.07.016</pub-id>
          <pub-id pub-id-type="medline">28866310</pub-id>
          <pub-id pub-id-type="pii">S1081-1206(17)30564-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sakagami</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hasegawa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Koya</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Furukawa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kawakami</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kimura</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hoshino</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sakamoto</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Shima</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kagamu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Suzuki</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Narita</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Cluster analysis identifies characteristic phenotypes of asthma with accelerated lung function decline</article-title>
          <source>J Asthma</source>
          <year>2014</year>
          <month>03</month>
          <volume>51</volume>
          <issue>2</issue>
          <fpage>113</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.3109/02770903.2013.852201</pub-id>
          <pub-id pub-id-type="medline">24102534</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seino</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hasegawa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Koya</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sakagami</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mashima</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Shimizu</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Muramatsu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Muramatsu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Suzuki</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kikuchi</surname>
              <given-names>T</given-names>
            </name>
            <collab>Niigata Respiratory Disease Study Group</collab>
          </person-group>
          <article-title>A cluster analysis of bronchial asthma patients with depressive symptoms</article-title>
          <source>Intern Med</source>
          <year>2018</year>
          <month>07</month>
          <day>15</day>
          <volume>57</volume>
          <issue>14</issue>
          <fpage>1967</fpage>
          <lpage>75</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.2169/internalmedicine.9073-17"/>
          </comment>
          <pub-id pub-id-type="doi">10.2169/internalmedicine.9073-17</pub-id>
          <pub-id pub-id-type="medline">29526967</pub-id>
          <pub-id pub-id-type="pmcid">PMC6096028</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agache</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Strasser</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Klenk</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Agache</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Farine</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ciobanu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Groenen</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Akdis</surname>
              <given-names>CA</given-names>
            </name>
          </person-group>
          <article-title>Serum IL-5 and IL-13 consistently serve as the best predictors for the blood eosinophilia phenotype in adult asthmatics</article-title>
          <source>Allergy</source>
          <year>2016</year>
          <month>08</month>
          <volume>71</volume>
          <issue>8</issue>
          <fpage>1192</fpage>
          <lpage>202</lpage>
          <pub-id pub-id-type="doi">10.1111/all.12906</pub-id>
          <pub-id pub-id-type="medline">27060452</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Newby</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Heaney</surname>
              <given-names>LG</given-names>
            </name>
            <name name-style="western">
              <surname>Menzies-Gow</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Niven</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Mansur</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bucknall</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhuri</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Burton</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Brightling</surname>
              <given-names>C</given-names>
            </name>
            <collab>British Thoracic Society Severe Refractory Asthma Network</collab>
          </person-group>
          <article-title>Statistical cluster analysis of the British Thoracic Society Severe refractory Asthma Registry: clinical outcomes and phenotype stability</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <volume>9</volume>
          <issue>7</issue>
          <fpage>e102987</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0102987"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0102987</pub-id>
          <pub-id pub-id-type="medline">25058007</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-36591</pub-id>
          <pub-id pub-id-type="pmcid">PMC4109965</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Uh</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Bae</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>YH</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>CS</given-names>
            </name>
          </person-group>
          <article-title>Clinical characteristics of exacerbation-prone adult asthmatics identified by cluster analysis</article-title>
          <source>Allergy Asthma Immunol Res</source>
          <year>2017</year>
          <month>11</month>
          <volume>9</volume>
          <issue>6</issue>
          <fpage>483</fpage>
          <lpage>90</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://e-aair.org/DOIx.php?id=10.4168/aair.2017.9.6.483"/>
          </comment>
          <pub-id pub-id-type="doi">10.4168/aair.2017.9.6.483</pub-id>
          <pub-id pub-id-type="medline">28913987</pub-id>
          <pub-id pub-id-type="pii">9.483</pub-id>
          <pub-id pub-id-type="pmcid">PMC5603476</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sendín-Hernández</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Ávila-Zarza</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sanz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>García-Sánchez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Marcos-Vadillo</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Muñoz-Bellido</surname>
              <given-names>FJ</given-names>
            </name>
            <name name-style="western">
              <surname>Laffond</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Domingo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Isidoro-García</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dávila</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Cluster analysis identifies 3 phenotypes within allergic asthma</article-title>
          <source>J Allergy Clin Immunol Pract</source>
          <year>2018</year>
          <volume>6</volume>
          <issue>3</issue>
          <fpage>955</fpage>
          <lpage>61.e1</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jaip.2017.10.006</pub-id>
          <pub-id pub-id-type="medline">29133218</pub-id>
          <pub-id pub-id-type="pii">S2213-2198(17)30787-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ward</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>Hierarchical grouping to optimize an objective function</article-title>
          <source>J Am Stat Assoc</source>
          <year>1963</year>
          <month>03</month>
          <volume>58</volume>
          <issue>301</issue>
          <fpage>236</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1080/01621459.1963.10500845</pub-id>
          <pub-id pub-id-type="pii">S2173-5115(15)00138-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Buttrey</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Whitaker</surname>
              <given-names>LR</given-names>
            </name>
          </person-group>
          <article-title>treeClust: an R package for tree-based clustering dissimilarities</article-title>
          <source>R J</source>
          <year>2015</year>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>227</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journal.r-project.org/archive/2015/RJ-2015-032/RJ-2015-032.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.32614/rj-2015-032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nuss</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rothe</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Siebenhüner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Akdis</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Menz</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Differential serum protein markers and the clinical severity of asthma</article-title>
          <source>J Asthma Allergy</source>
          <year>2014</year>
          <volume>7</volume>
          <fpage>67</fpage>
          <lpage>75</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.2147/JAA.S53920"/>
          </comment>
          <pub-id pub-id-type="doi">10.2147/JAA.S53920</pub-id>
          <pub-id pub-id-type="medline">24851055</pub-id>
          <pub-id pub-id-type="pii">jaa-7-067</pub-id>
          <pub-id pub-id-type="pmcid">PMC4008293</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ramakrishnan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Livny</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>BIRCH: a new data clustering algorithm and its applications</article-title>
          <source>Data Min Knowl Discov</source>
          <year>1996</year>
          <volume>25</volume>
          <issue>2</issue>
          <fpage>141</fpage>
          <lpage>82</lpage>
          <pub-id pub-id-type="doi">10.1145/233269.233324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bacher</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wenzig</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Vogler</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>SPSS TwoStep Cluster - a first evaluation</article-title>
          <source>Soc Sci Open Access Repos</source>
          <year>2004</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ssoar.info/ssoar/bitstream/handle/document/32715/ssoar-2004-bacher_et_al-SPSS_TwoStep_Cluster_-_a.pdf?sequence=1&#38;isAllowed=y&#38;lnkname=ssoar-2004-bacher_et_al-SPSS_TwoStep_Cluster_-_a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruggieri</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Drago</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Longo</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Colombo</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Balzan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bilocca</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zammit</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Montefort</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Scaccianoce</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cuttitta</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Viegi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cibella</surname>
              <given-names>F</given-names>
            </name>
            <collab>RESPIRA Project Group</collab>
          </person-group>
          <article-title>Sensitization to dust mite defines different phenotypes of asthma: a multicenter study</article-title>
          <source>Pediatr Allergy Immunol</source>
          <year>2017</year>
          <month>11</month>
          <volume>28</volume>
          <issue>7</issue>
          <fpage>675</fpage>
          <lpage>82</lpage>
          <pub-id pub-id-type="doi">10.1111/pai.12768</pub-id>
          <pub-id pub-id-type="medline">28783215</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Differences in airway remodeling and airway inflammation among moderate-severe asthma clinical phenotypes</article-title>
          <source>J Thorac Dis</source>
          <year>2017</year>
          <month>09</month>
          <volume>9</volume>
          <issue>9</issue>
          <fpage>2904</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.21037/jtd.2017.08.01"/>
          </comment>
          <pub-id pub-id-type="doi">10.21037/jtd.2017.08.01</pub-id>
          <pub-id pub-id-type="medline">29221262</pub-id>
          <pub-id pub-id-type="pii">jtd-09-09-2904</pub-id>
          <pub-id pub-id-type="pmcid">PMC5708482</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaufman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rousseeuw</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Kaufman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rousseeuw</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>Partition Around Medoids (Program PAM)</article-title>
          <source>Finding Groups in Data: An Introduction to Cluster Analysis</source>
          <year>2005</year>
          <publisher-loc>NJ</publisher-loc>
          <publisher-name>John Wiley &#38; Sons</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sekiya</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nakatani</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Fukutomi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kaneda</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Iikura</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yoshida</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Takahashi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tomii</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nishikawa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kaneko</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sugino</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shinkai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ueda</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tanikawa</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shirai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hirabayashi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Aoki</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kato</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Iizuka</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Homma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Taniguchi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tanaka</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Severe or life-threatening asthma exacerbation: patient heterogeneity identified by cluster analysis</article-title>
          <source>Clin Exp Allergy</source>
          <year>2016</year>
          <month>08</month>
          <volume>46</volume>
          <issue>8</issue>
          <fpage>1043</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.1111/cea.12738</pub-id>
          <pub-id pub-id-type="medline">27041475</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loza</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Djukanovic</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>KF</given-names>
            </name>
            <name name-style="western">
              <surname>Horowitz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Branigan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Barnathan</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Susulic</surname>
              <given-names>VS</given-names>
            </name>
            <name name-style="western">
              <surname>Silkoff</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Sterk</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Baribaud</surname>
              <given-names>F</given-names>
            </name>
            <collab>ADEPT (Airways Disease Endotyping for Personalized Therapeutics), U-BIOPRED (Unbiased Biomarkers for the Prediction of Respiratory Disease Outcome Consortium) investigators</collab>
          </person-group>
          <article-title>Validated and longitudinally stable asthma phenotypes based on cluster analysis of the ADEPT study</article-title>
          <source>Respir Res</source>
          <year>2016</year>
          <month>12</month>
          <day>15</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>165</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://respiratory-research.biomedcentral.com/articles/10.1186/s12931-016-0482-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12931-016-0482-9</pub-id>
          <pub-id pub-id-type="medline">27978840</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12931-016-0482-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC5159977</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dhillon</surname>
              <given-names>IS</given-names>
            </name>
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kulis</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Kernel K-Means: Spectral Clustering and Normalized Cuts</article-title>
          <source>Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining</source>
          <year>2004</year>
          <conf-name>KDD'04</conf-name>
          <conf-date>August 22 - 25, 2004</conf-date>
          <conf-loc>Seattle, WA, USA</conf-loc>
          <fpage>551</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1145/1014052.1014118</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bleecker</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>Castro</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Denlinger</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Erzurum</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Fahy</surname>
              <given-names>JV</given-names>
            </name>
            <name name-style="western">
              <surname>Fitzpatrick</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Gaston</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Israel</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Jarjour</surname>
              <given-names>NN</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Mauger</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Meyers</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Phipatanakul</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sorkness</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Wenzel</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>Multiview cluster analysis identifies variable corticosteroid response phenotypes in severe asthma</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2019</year>
          <month>06</month>
          <day>1</day>
          <volume>199</volume>
          <issue>11</issue>
          <fpage>1358</fpage>
          <lpage>67</lpage>
          <pub-id pub-id-type="doi">10.1164/rccm.201808-1543OC</pub-id>
          <pub-id pub-id-type="medline">30682261</pub-id>
          <pub-id pub-id-type="pmcid">PMC6543720</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Holm</surname>
              <given-names>CT</given-names>
            </name>
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Cohn</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Nezgovorova</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Meyers</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Bleecker</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>Crisafi</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Jarjour</surname>
              <given-names>NN</given-names>
            </name>
            <name name-style="western">
              <surname>Rogers</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Reibman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chupp</surname>
              <given-names>GL</given-names>
            </name>
            <collab>SARP Investigators</collab>
          </person-group>
          <article-title>Characterisation of asthma subgroups associated with circulating YKL-40 levels</article-title>
          <source>Eur Respir J</source>
          <year>2017</year>
          <month>10</month>
          <volume>50</volume>
          <issue>4</issue>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://erj.ersjournals.com/cgi/pmidlookup?view=long&#38;pmid=29025889"/>
          </comment>
          <pub-id pub-id-type="doi">10.1183/13993003.00800-2017</pub-id>
          <pub-id pub-id-type="medline">29025889</pub-id>
          <pub-id pub-id-type="pii">50/4/1700800</pub-id>
          <pub-id pub-id-type="pmcid">PMC5967238</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amelink</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>de Nijs</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>de Groot</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>van Tilburg</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>van Spiegel</surname>
              <given-names>PI</given-names>
            </name>
            <name name-style="western">
              <surname>Krouwels</surname>
              <given-names>FH</given-names>
            </name>
            <name name-style="western">
              <surname>Lutter</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zwinderman</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Weersink</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>ten Brinke</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sterk</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bel</surname>
              <given-names>EH</given-names>
            </name>
          </person-group>
          <article-title>Three phenotypes of adult-onset asthma</article-title>
          <source>Allergy</source>
          <year>2013</year>
          <volume>68</volume>
          <issue>5</issue>
          <fpage>674</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1111/all.12136</pub-id>
          <pub-id pub-id-type="medline">23590217</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Benton</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lerner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Foerster</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Teach</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Freishtat</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Overcoming heterogeneity in pediatric asthma: tobacco smoke and asthma characteristics within phenotypic clusters in an African American cohort</article-title>
          <source>J Asthma</source>
          <year>2010</year>
          <month>09</month>
          <volume>47</volume>
          <issue>7</issue>
          <fpage>728</fpage>
          <lpage>34</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20684733"/>
          </comment>
          <pub-id pub-id-type="doi">10.3109/02770903.2010.491142</pub-id>
          <pub-id pub-id-type="medline">20684733</pub-id>
          <pub-id pub-id-type="pmcid">PMC3325290</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lemiere</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>NGuyen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sava</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>D'Alpaos</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Huaux</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Vandenplas</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Occupational asthma phenotypes identified by increased fractional exhaled nitric oxide after exposure to causal agents</article-title>
          <source>J Allergy Clin Immunol</source>
          <year>2014</year>
          <month>11</month>
          <volume>134</volume>
          <issue>5</issue>
          <fpage>1063</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jaci.2014.08.017</pub-id>
          <pub-id pub-id-type="medline">25262466</pub-id>
          <pub-id pub-id-type="pii">S0091-6749(14)01189-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garge</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>Page</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Sprague</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Gorman</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Allison</surname>
              <given-names>DB</given-names>
            </name>
          </person-group>
          <article-title>Reproducible clusters from microarray research: whither?</article-title>
          <source>BMC Bioinformatics</source>
          <year>2005</year>
          <month>07</month>
          <day>15</day>
          <volume>6</volume>
          <issue>Suppl 2</issue>
          <fpage>S10</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-6-S2-S10"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-6-S2-S10</pub-id>
          <pub-id pub-id-type="medline">16026595</pub-id>
          <pub-id pub-id-type="pii">1471-2105-6-S2-S10</pub-id>
          <pub-id pub-id-type="pmcid">PMC1637038</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bair</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Semi-supervised methods to predict patient survival from gene expression data</article-title>
          <source>PLoS Biol</source>
          <year>2004</year>
          <month>04</month>
          <volume>2</volume>
          <issue>4</issue>
          <fpage>E108</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pbio.0020108"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pbio.0020108</pub-id>
          <pub-id pub-id-type="medline">15094809</pub-id>
          <pub-id pub-id-type="pmcid">PMC387275</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bair</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Semi-supervised clustering methods</article-title>
          <source>Wiley Interdiscip Rev Comput Stat</source>
          <year>2013</year>
          <volume>5</volume>
          <issue>5</issue>
          <fpage>349</fpage>
          <lpage>61</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24729830"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/wics.1270</pub-id>
          <pub-id pub-id-type="medline">24729830</pub-id>
          <pub-id pub-id-type="pmcid">PMC3979639</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Just</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gouvis-Echraghi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Couderc</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Guillemot-Lambert</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Saint-Pierre</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Novel severe wheezy young children phenotypes: boys atopic multiple-trigger and girls nonatopic uncontrolled wheeze</article-title>
          <source>J Allergy Clin Immunol</source>
          <year>2012</year>
          <month>07</month>
          <volume>130</volume>
          <issue>1</issue>
          <fpage>103</fpage>
          <lpage>10.e8</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jaci.2012.02.041</pub-id>
          <pub-id pub-id-type="medline">22502798</pub-id>
          <pub-id pub-id-type="pii">S0091-6749(12)00401-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Just</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Saint-Pierre</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gouvis-Echraghi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Boutin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Panayotopoulos</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Chebahi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ousidhoum-Zidi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Khau</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Wheeze phenotypes in young children have different courses during the preschool period</article-title>
          <source>Ann Allergy Asthma Immunol</source>
          <year>2013</year>
          <month>10</month>
          <volume>111</volume>
          <issue>4</issue>
          <fpage>256</fpage>
          <lpage>61.e1</lpage>
          <pub-id pub-id-type="doi">10.1016/j.anai.2013.07.002</pub-id>
          <pub-id pub-id-type="medline">24054360</pub-id>
          <pub-id pub-id-type="pii">S1081-1206(13)00481-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Just</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Saint-Pierre</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gouvis-Echraghi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Laoudi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Roufai</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Momas</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Maesano</surname>
              <given-names>IA</given-names>
            </name>
          </person-group>
          <article-title>Childhood allergic asthma is not a single phenotype</article-title>
          <source>J Pediatr</source>
          <year>2014</year>
          <month>04</month>
          <volume>164</volume>
          <issue>4</issue>
          <fpage>815</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jpeds.2013.11.037</pub-id>
          <pub-id pub-id-type="medline">24412137</pub-id>
          <pub-id pub-id-type="pii">S0022-3476(13)01472-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zoratti</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Krouse</surname>
              <given-names>RZ</given-names>
            </name>
            <name name-style="western">
              <surname>Babineau</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Pongracic</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>GT</given-names>
            </name>
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Hershey</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Kercsmar</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Gruchalla</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Kattan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Teach</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sigelman</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Gergen</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Togias</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Visness</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Busse</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>AH</given-names>
            </name>
          </person-group>
          <article-title>Asthma phenotypes in inner-city children</article-title>
          <source>J Allergy Clin Immunol</source>
          <year>2016</year>
          <month>10</month>
          <volume>138</volume>
          <issue>4</issue>
          <fpage>1016</fpage>
          <lpage>29</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27720016"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jaci.2016.06.061</pub-id>
          <pub-id pub-id-type="medline">27720016</pub-id>
          <pub-id pub-id-type="pii">S0091-6749(16)30885-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC5104222</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaufman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rousseeuw</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Kaufman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rousseeuw</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>Fuzzy Analysis (Program FANNY)</article-title>
          <source>Finding Groups in Data: An Introduction to Cluster Analysis</source>
          <year>2005</year>
          <publisher-loc>NJ</publisher-loc>
          <publisher-name>John Wiley &#38; Sons</publisher-name>
          <fpage>164</fpage>
          <lpage>98</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Labor</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Labor</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jurić</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Fijačko</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Grle</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Plavec</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Mood disorders in adult asthma phenotypes</article-title>
          <source>J Asthma</source>
          <year>2018</year>
          <month>01</month>
          <volume>55</volume>
          <issue>1</issue>
          <fpage>57</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1080/02770903.2017.1306546</pub-id>
          <pub-id pub-id-type="medline">28489959</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Obara</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ishikuro</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tamiya</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ueki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yamanaka</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mizuno</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kikuya</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Metoki</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Matsubara</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nagai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kobayashi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kamiyama</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Watanabe</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kakuta</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ouchi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kurihara</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fukuchi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Yasuhara</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Inagaki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kaga</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kure</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kuriyama</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Potential identification of vitamin B6 responsiveness in autism spectrum disorder utilizing phenotype variables and machine learning methods</article-title>
          <source>Sci Rep</source>
          <year>2018</year>
          <month>10</month>
          <day>4</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>14840</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.1038/s41598-018-33110-w"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-018-33110-w</pub-id>
          <pub-id pub-id-type="medline">30287864</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-018-33110-w</pub-id>
          <pub-id pub-id-type="pmcid">PMC6172273</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cook</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Rumble</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Plante</surname>
              <given-names>DT</given-names>
            </name>
          </person-group>
          <article-title>Identifying subtypes of Hypersomnolence Disorder: a clustering analysis</article-title>
          <source>Sleep Med</source>
          <year>2019</year>
          <month>12</month>
          <volume>64</volume>
          <fpage>71</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1016/j.sleep.2019.06.015</pub-id>
          <pub-id pub-id-type="medline">31670163</pub-id>
          <pub-id pub-id-type="pii">S1389-9457(19)30231-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wolters</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Moonen</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lopes</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Leentjens</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Duits</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Defebvre</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Delmaire</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hofman</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>van Bussel</surname>
              <given-names>FC</given-names>
            </name>
            <name name-style="western">
              <surname>Dujardin</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Grey matter abnormalities are associated only with severe cognitive decline in early stages of Parkinson's disease</article-title>
          <source>Cortex</source>
          <year>2020</year>
          <month>02</month>
          <volume>123</volume>
          <fpage>1</fpage>
          <lpage>11</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cortex.2019.09.015</pub-id>
          <pub-id pub-id-type="medline">31733342</pub-id>
          <pub-id pub-id-type="pii">S0010-9452(19)30337-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pikoula</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Quint</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Nissen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hemingway</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Smeeth</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Denaxas</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Identifying clinically important COPD sub-types using data-driven approaches in primary care population based electronic health records</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <month>04</month>
          <day>18</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>86</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-0805-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-0805-0</pub-id>
          <pub-id pub-id-type="medline">30999919</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-0805-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6472089</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Robust multiple kernel k-means clustering using min-max optimization</article-title>
          <source>arXiv preprints</source>
          <year>2018</year>
          <comment>preprint; arXiv:1803.02458<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1803.02458"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>On Spectral Clustering: Analysis and an Algorithm</article-title>
          <source>Proceedings of the 14th International Conference on Neural Information Processing Systems: Natural and Synthetic</source>
          <year>2001</year>
          <conf-name>NIPS'01</conf-name>
          <conf-date>December 3-8, 2001</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <fpage>849</fpage>
          <lpage>56</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/2980539.2980649"/>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
