<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v12i1e59858</article-id>
      <article-id pub-id-type="pmid">39270211</article-id>
      <article-id pub-id-type="doi">10.2196/59858</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Early Diagnosis of Hereditary Angioedema in Japan Based on a US Medical Dataset: Algorithm Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Takemura</surname>
            <given-names>Tadamasa</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhu</surname>
            <given-names>Rongfei</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wu</surname>
            <given-names>Jianqiang</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Yamashita</surname>
            <given-names>Kouhei</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Hematology and Oncology</institution>
            <institution>Graduate School of Medicine</institution>
            <institution>Kyoto University</institution>
            <addr-line>54 Shogoin-kawahara-cho, Sakyo-ku</addr-line>
            <addr-line>Kyoto, 606-8507</addr-line>
            <country>Japan</country>
            <phone>81 75 751 4964</phone>
            <fax>81 75 751 4963</fax>
            <email>kouhei@kuhp.kyoto-u.ac.jp</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4247-3407</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Nomoto</surname>
            <given-names>Yuji</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0002-9123-1121</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Hirose</surname>
            <given-names>Tomoya</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5959-4569</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Yutani</surname>
            <given-names>Akira</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1430-8693</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Okada</surname>
            <given-names>Akira</given-names>
          </name>
          <degrees>ME</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-2166-2398</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Watanabe</surname>
            <given-names>Nayu</given-names>
          </name>
          <degrees>MMG</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-7404-0728</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Suzuki</surname>
            <given-names>Ken</given-names>
          </name>
          <degrees>ME</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-9783-4573</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Senzaki</surname>
            <given-names>Munenori</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-2140-8256</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Kuroda</surname>
            <given-names>Tomohiro</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1472-7203</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Hematology and Oncology</institution>
        <institution>Graduate School of Medicine</institution>
        <institution>Kyoto University</institution>
        <addr-line>Kyoto</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Palliative Care Medicine</institution>
        <institution>Niigata City General Hospital</institution>
        <addr-line>Niigata</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Traumatology and Acute Critical Medicine</institution>
        <institution>Graduate School of Medicine</institution>
        <institution>Osaka University</institution>
        <addr-line>Osaka</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Division of Medical Information Technology and Administration Planning</institution>
        <institution>Kyoto University Hospital</institution>
        <addr-line>Kyoto</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Healthcare and Life Science, IBM Consulting</institution>
        <institution>IBM Japan, Ltd</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Kouhei Yamashita <email>kouhei@kuhp.kyoto-u.ac.jp</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>13</day>
        <month>9</month>
        <year>2024</year>
      </pub-date>
      <volume>12</volume>
      <elocation-id>e59858</elocation-id>
      <history>
        <date date-type="received">
          <day>26</day>
          <month>4</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>25</day>
          <month>5</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>13</day>
          <month>6</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>6</day>
          <month>8</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Kouhei Yamashita, Yuji Nomoto, Tomoya Hirose, Akira Yutani, Akira Okada, Nayu Watanabe, Ken Suzuki, Munenori Senzaki, Tomohiro Kuroda. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 13.09.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2024/1/e59858" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Hereditary angioedema (HAE), a rare genetic disease, induces acute attacks of swelling in various regions of the body. Its prevalence is estimated to be 1 in 50,000 people, with no reported bias among different ethnic groups. However, considering the estimated prevalence, the number of patients in Japan diagnosed with HAE remains approximately 1 in 250,000, which means that only 20% of potential HAE cases are identified.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to develop an artificial intelligence (AI) model that can detect patients with suspected HAE using medical history data (medical claims, prescriptions, and electronic medical records [EMRs]) in the United States. We also aimed to validate the detection performance of the model for HAE cases using the Japanese dataset.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The HAE patient and control groups were identified using the US claims and EMR datasets. We analyzed the characteristics of the diagnostic history of patients with HAE and developed an AI model to predict the probability of HAE based on a generalized linear model and bootstrap method. The model was then applied to the EMR data of the Kyoto University Hospital to verify its applicability to the Japanese dataset.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Precision and sensitivity were measured to validate the model performance. Using the comprehensive US dataset, the precision score was 2% in the initial model development step. Our model can screen out suspected patients, where 1 in 50 of these patients have HAE. In addition, in the validation step with Japanese EMR data, the precision score was 23.6%, which exceeded our expectations. We achieved a sensitivity score of 61.5% for the US dataset and 37.6% for the validation exercise using data from a single Japanese hospital. Overall, our model could predict patients with typical HAE symptoms.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study indicates that our AI model can detect HAE in patients with typical symptoms and is effective in Japanese data. However, further prospective clinical studies are required to investigate whether this model can be used to diagnose HAE.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>machine learning</kwd>
        <kwd>screening</kwd>
        <kwd>AI</kwd>
        <kwd>prediction</kwd>
        <kwd>rare diseases</kwd>
        <kwd>HAE</kwd>
        <kwd>electronic medical record</kwd>
        <kwd>real world data</kwd>
        <kwd>big data</kwd>
        <kwd>angioedema</kwd>
        <kwd>edema</kwd>
        <kwd>ML</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>algorithm</kwd>
        <kwd>algorithms</kwd>
        <kwd>predictive model</kwd>
        <kwd>predictive models</kwd>
        <kwd>predictive analytics</kwd>
        <kwd>predictive system</kwd>
        <kwd>practical model</kwd>
        <kwd>practical models</kwd>
        <kwd>early warning</kwd>
        <kwd>early detection</kwd>
        <kwd>real world data</kwd>
        <kwd>RWD</kwd>
        <kwd>Electronic health record</kwd>
        <kwd>EHR</kwd>
        <kwd>electronic health records</kwd>
        <kwd>EHRs</kwd>
        <kwd>EMR</kwd>
        <kwd>electronic medical records</kwd>
        <kwd>EMRs</kwd>
        <kwd>patient record</kwd>
        <kwd>patient record</kwd>
        <kwd>health record</kwd>
        <kwd>health records</kwd>
        <kwd>personal health record</kwd>
        <kwd>PHR</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The rare genetic disease hereditary angioedema (HAE) induces acute attacks of swelling in various regions of the body, including the face, hands, arms, legs, abdomen, genitals, buttocks, and throat. Gastrointestinal disturbances such as abdominal pain, nausea, and vomiting are frequently associated with edema. Laryngeal edema is rare, even though more than half of the patients with HAE encounter this life-threatening condition [<xref ref-type="bibr" rid="ref1">1</xref>]. Its global prevalence is estimated to be 1 in 50,000 people, with no reported bias among different ethnic groups [<xref ref-type="bibr" rid="ref2">2</xref>]. In Japan, about 1 in 250,000 people are diagnosed with HAE, which suggests that only 20% of potential HAE cases are identified [<xref ref-type="bibr" rid="ref3">3</xref>], suggesting that many patients with HAE remain undiagnosed in Japan. Furthermore, in Japan, the mean duration from the first symptoms to diagnosis is 15.6 years [<xref ref-type="bibr" rid="ref4">4</xref>], which is longer than that in Europe and the United States [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Early detection of undiagnosed patients is critical for effective treatment of HAE.</p>
      <p>To overcome this situation in Japan, the Diagnostic Consortium to Advance the Ecosystem for Hereditary Angioedema (DISCOVERY) was established in 2021 [<xref ref-type="bibr" rid="ref7">7</xref>]; it aimed to identify patients with undiagnosed HAE and provide them with appropriate treatment as early as possible.</p>
      <p>In this study, we aimed to develop an artificial intelligence (AI) model that can detect suspected patients with HAE using medical history data (claims and electronic medical records [EMRs]) in the United States. We then sought to validate the model’s performance in detecting HAE cases. In addition, we conducted a pilot study at Kyoto University Hospital (KUHP) using the EMR data to verify the model’s applicability to medical data obtained from the Japanese population. The main objective of this study was to verify whether this model could identify patients with a history of HAE or related diseases.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>First, we developed an AI model using medical history data from the United States as a reference. Thereafter, we applied the model to medical history data from Japan and verified its efficacy using a Japanese dataset. Note that we used a large dataset of patients from the United States as input for the model, considering that HAE is a rare disease.</p>
      </sec>
      <sec>
        <title>Initial Model Development with US Dataset</title>
        <sec>
          <title>Data Selection</title>
          <p>The Merative MarketScan Explorys Claims-EMR Data Set (formerly IBM Watson Health) [<xref ref-type="bibr" rid="ref8">8</xref>] was used to obtain patient-level linked claims and EMR data for US patients. The diagnoses and prescription histories of patients with edema or digestive symptoms from January 2012 to January 2021 were identified from the dataset and were used to build our model. Data from a total of 4,283,815 patients were used in the study.</p>
          <p>To identify the diagnosis history of patients, the <italic>International Classification of Diseases</italic> (<italic>ICD</italic>) [<xref ref-type="bibr" rid="ref9">9</xref>] code (ninth and 10th edition) available in this dataset was used. However, the <italic>ICD</italic> code for HAE (D84.1) represents “defects in the complement system,” which is also applicable to other similar diseases. Therefore, we used the prescription history of drugs administered only for HAE (<xref ref-type="table" rid="table1">Table 1</xref>) to distinguish patients with HAE. We categorized the patients with a prescription history of these drugs as the “HAE group,” representing patients presumed to have HAE.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>US Food and Drug Administration–approved medications used only for hereditary angioedema (as of January 2022).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="350"/>
              <col width="400"/>
              <col width="250"/>
              <thead>
                <tr valign="bottom">
                  <td>Proprietary name</td>
                  <td>Nonproprietary name</td>
                  <td>Product NDC<sup>a</sup></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>BERINERT</td>
                  <td>Human C1-esterase inhibitor</td>
                  <td>63833-825</td>
                </tr>
                <tr valign="top">
                  <td>CINRYZE</td>
                  <td>Human C1-esterase inhibitor</td>
                  <td>42227-081<break/>42227-083</td>
                </tr>
                <tr valign="top">
                  <td>FIRAZYR</td>
                  <td>Icatibant acetate</td>
                  <td>54092-702</td>
                </tr>
                <tr valign="top">
                  <td>HAEGARDA</td>
                  <td>Human C1-esterase inhibitor</td>
                  <td>63833-828<break/>63833-829</td>
                </tr>
                <tr valign="top">
                  <td>KALBITOR</td>
                  <td>Ecallantide</td>
                  <td>47783-101</td>
                </tr>
                <tr valign="top">
                  <td>ORLADEYO</td>
                  <td>Berotralstat hydrochloride</td>
                  <td>72769-101<break/>72769-102</td>
                </tr>
                <tr valign="top">
                  <td>RUCONEST</td>
                  <td>C1 esterase inhibitor recombinant</td>
                  <td>70383-350<break/>69913-350<break/>71274-350</td>
                </tr>
                <tr valign="top">
                  <td>TAKHZYRO</td>
                  <td>lanadelumab-flyo</td>
                  <td>47783-644</td>
                </tr>
                <tr valign="top">
                  <td>Icatibant (Generic)</td>
                  <td>Icatibant acetate or Icatibant</td>
                  <td>0093-3066<break/>24201-207<break/>60505-6214<break/>63323-574<break/>68462-828<break/>69097-664<break/>71225-114</td>
                </tr>
                <tr valign="top">
                  <td>SAJAZIR</td>
                  <td>Icatibant</td>
                  <td>70709-013</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>NDC: National Drug Code.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <p>To maintain the demographic characteristics of the original data, the control group was randomly sampled from 1% of the remaining patients, with a fixed ratio of age groups and male-to-female ratio (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Note that this was crucial to reduce the data volume to operate the model using limited computation resources (2 central processing units and 16 GB of memory). This was done considering the potential use of the model in various medical institutions in the future.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Comparison of the distribution of the 1% sampled data set with that of the population. dobyr: date of birth year.</p>
            </caption>
            <graphic xlink:href="medinform_v12i1e59858_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>Finally, 3 groups were included for model development and validation (<xref rid="figure2" ref-type="fig">Figure 2</xref>): the HAE group with 179 patients, D84.1 (including individuals that likely have HAE but do not have a prescription history of HAE-specific treatments) with 1521 patients, and the control group with 42,839 patients.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Flowchart depicting the different patient groups created using the US data set; HAE: hereditary angioedema.</p>
            </caption>
            <graphic xlink:href="medinform_v12i1e59858_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>To develop the model, the <italic>ICD</italic> code was used to create features that described the diagnostic history of patients. As this dataset contained both <italic>ICD-9</italic> and <italic>ICD-10</italic> codes throughout the data period, we standardized the 2 <italic>ICD</italic> types. We assigned codes representing the same disease items from both <italic>ICD-9</italic> and <italic>ICD-10</italic> codes under a single ID.</p>
        </sec>
        <sec>
          <title>Model Development</title>
          <sec>
            <title>Feature Selection</title>
            <p>We counted the number of types of <italic>ICD</italic> codes diagnosed in both the HAE and D84.1 groups, as these 2 groups should have similar features. Furthermore, the differences in <italic>ICD</italic> code types between the groups were required to create a model that can identify patients with HAE. We examined rank correlations between the 2 groups and found it to be approximately 0.08, which suggested that the 2 groups had different characteristics. We then examined specific <italic>ICD</italic> codes that were significantly ranked differently between the two groups and identified 25 such <italic>ICD</italic> codes, which were then used as the primary features in developing the model.</p>
            <p>We also examined <italic>ICD</italic> codes that were diagnosed several times over a period of 1 year. This is important as patients with HAE tend to have repeated occurrences of swelling in various regions of the body [<xref ref-type="bibr" rid="ref1">1</xref>], which can lead to the diagnosis of stomachaches and edemas. We counted the number of patients who had been diagnosed with stomachaches or edemas between 2 and 4 times per year and found a substantial difference between both groups. Considering that the medical record entry may overlap multiple times when changing the record types, we conducted the removal of duplicates based on the date and <italic>ICD</italic> code for each patient. Thereafter, we labeled a group of <italic>ICD</italic> codes related to abdominal pain or edemas and counted the number of times they were assigned in a 1-year span window for each patient based on this dataset. From this exploratory analysis, we included instances where individuals experienced four or more incidences of stomachaches and 3 or more incidences of edema per year as part of the main features of our model. The table of the explanatory variables is provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </sec>
          <sec>
            <title>Model Building</title>
            <p>The number of patients in the HAE group was extremely small compared with that in the control + D84.1 groups; thus, to avoid overfitting, we used bootstrap sampling [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>] to create the model. A generalized linear model [<xref ref-type="bibr" rid="ref12">12</xref>] with regularization terms [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>] was adopted. We used <inline-graphic xlink:href="medinform_v12i1e59858_fig7.png" xlink:type="simple" mimetype="image"/> for the link function to create a logistic model that would indicate the likelihood of the patient belonging to the HAE group. We chose logistic regression for evaluation as it allows for regression with regularization and is relatively easy to use for evaluating and interpreting feature importance by checking the coefficients. The estimation of the partial regression coefficients was calculated by the maximum likelihood method, which estimates parameters (known as maximum likelihood estimates) that maximize the likelihood of the given observed values. The regularization parameter λ was set to 1 to ensure that it was Lasso regularization.</p>
            <p>We used 25% of the data from the HAE group and another 25% from the control + D84.1 groups to train the model, which was then used to predict the remaining 75% of each group. This modeling process was performed 20 times with different random seeds. The average predicted value was calculated as the final output for all the patients. In each trial, the sample used as training data did not have a predicted value and was excluded from the average value calculation (<xref rid="figure3" ref-type="fig">Figure 3</xref>). Upon applying the regularization using Lasso regression, the number of substantial features was sorted out during each calculation by mathematically adjusting the coefficients of some variables to 0. The number of sorted features varied with an average of 10; notably, different features were selected every time.</p>
            <fig id="figure3" position="float">
              <label>Figure 3</label>
              <caption>
                <p>Training data extraction and prediction calculation of the constructed model. HAE: hereditary angioedema.</p>
              </caption>
              <graphic xlink:href="medinform_v12i1e59858_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Evaluation Method and Threshold Setting</title>
        <p>After obtaining the final value for each participant, we performed Welch <italic>t</italic> test on the 2 distributions to confirm that the 2 groups had different means. Subsequently, we defined the threshold value that yielded the most balanced classification accuracy using the receiver operating characteristic (ROC) curve. ROC curves help visualize the entire scenario of trade-offs between sensitivity and precision across a set of cutoff points. The volumes of the HAE and control + D84.1 groups were not equal; therefore, it was important to check the balance between sensitivity and precision rather than the accuracy itself.</p>
      </sec>
      <sec>
        <title>Model Application to Japanese EMR Data</title>
        <sec>
          <title>Data Extraction and Model Application</title>
          <p>For the validation step using Japanese data, data were extracted from a data warehouse (DWH), which collects medical data from the EMR of the KUHP. Patient IDs in the DWH are pseudonymized. The medical data were obtained for a total of 702,213 patients, among which 22 had a history of HAE, 47 had a confirmed diagnosis of HAE, and 123 had a suspected diagnosis of HAE. The data for model validation included those associated with patients from all these groups (patients using drugs for HAE, patients with confirmed HAE, and patients suspected to have HAE). This was done because physicians may have suspected HAE for some patients if their symptoms were similar to those of patients with the condition. Therefore, these 3 types of patients were considered as the patients with HAE in the study (HAE group; <xref rid="figure4" ref-type="fig">Figure 4</xref>).</p>
          <p>To adapt the model to Japanese data, we used the standard disease name codes widely used in Japan, as defined by the Medical Information System Development Center (MEDIS-DC) [<xref ref-type="bibr" rid="ref15">15</xref>], instead of the <italic>ICD</italic> code. Although the <italic>ICD</italic> code is the basic classification code for diagnosis, the standard disease name codes have more subdivisions compared with the <italic>ICD</italic> code, and hence, they can provide a more precise clinical diagnosis. We converted the <italic>ICD</italic> codes using the standard disease name code master for <italic>ICD-10</italic> [<xref ref-type="bibr" rid="ref16">16</xref>].</p>
          <p>Patient data extracted from the DWH were transferred to the Google Cloud Platform server (a virtual private cloud environment) hosted at KUHP. The AI model and statistical programs were stored in a container and sent to the server. We then accessed the server through a virtual private network, which could only be accessed by the authors of this study. The model was applied to all patient data on this server.</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Flowchart depicting the different patient groups obtained from the KUHP data set. HAE: hereditary angioedema.</p>
            </caption>
            <graphic xlink:href="medinform_v12i1e59858_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was approved by the Ethics Committee of the Kyoto University Hospital (approval number R3750). In this study, we used pseudonymized information that had already been processed, thus individual informed consent was not required. The pseudonymized medical data is made available for academic research in accordance with KUHP’s privacy policy. Information regarding each study is publicly disclosed on the institution’s website, where patients are informed of their right to opt out along with the opt-out procedure.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Evaluation of the Initial Model</title>
        <p>Welch <italic>t</italic> test indicated that the 2 patient groups did not have the same mean values, as suggested by the <italic>P</italic> value of 2.2e-16. Furthermore, the area under the ROC curve was 86.4%, which was obtained when only the HAE group was set as true and all the other groups as false. The best accuracy threshold of this ROC curve was calculated as 39%, with an accuracy of 99.6%. This is because the volume of the control + D84.1 group was larger than that of the HAE group. The true-positive (sensitivity) of this threshold was only 10.6%, with a precision of 54.3%.</p>
        <p>As we aimed to identify patients likely to have HAE, we searched for a different threshold that could improve the sensitivity while keeping the precision at an acceptable level. Considering the fact that the prediction of the HAE group had <inline-graphic xlink:href="medinform_v12i1e59858_fig8.png" xlink:type="simple" mimetype="image"/> and <inline-graphic xlink:href="medinform_v12i1e59858_fig9.png" xlink:type="simple" mimetype="image"/>, 0.075-0.125 could be a good threshold candidate. We confirmed the sensitivity and precision for the thresholds of 0.075, 0.1, and 0.125 to determine the most balanced threshold, as shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Cross-tabulation was calculated at 3 different threshold values using all data groups for a detailed evaluation of different scaled precisions and group sensitivities.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="220"/>
            <col width="80"/>
            <col width="80"/>
            <col width="0"/>
            <col width="80"/>
            <col width="0"/>
            <col width="80"/>
            <col width="0"/>
            <col width="90"/>
            <col width="0"/>
            <col width="80"/>
            <col width="80"/>
            <col width="90"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="3">Control group</td>
                <td colspan="2">D84.1 group</td>
                <td colspan="2">D84.1 and HAE<sup>a</sup></td>
                <td colspan="2">HAE group (not D84.1)</td>
                <td colspan="4">Score (%)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>100% scale converged</td>
                <td>1% scale</td>
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">1% scale precision</td>
                <td>100% scale precision</td>
                <td>Sensitivity 1<sup>b</sup></td>
                <td>Sensitivity 2<sup>c</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="15">
                  <bold>Suspection statistic (threshold=0.1)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not suspected, n</td>
                <td>4,279,500</td>
                <td>42,795</td>
                <td colspan="2">1312</td>
                <td colspan="2">55</td>
                <td colspan="2">30</td>
                <td colspan="2">27.1</td>
                <td>2.0</td>
                <td>61.5</td>
                <td>52.5</td>
              </tr>
              <tr valign="bottom">
                <td>
                  <break/>
                </td>
                <td>Suspected, n</td>
                <td>4400</td>
                <td>44</td>
                <td colspan="2">209</td>
                <td colspan="2">88</td>
                <td colspan="2">6</td>
                <td colspan="2">—<sup>d</sup></td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td colspan="15">
                  <bold>Suspection statistic (threshold=0.075)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not suspected, n</td>
                <td>4,277,900</td>
                <td>42,779</td>
                <td colspan="2">1266</td>
                <td colspan="2">52</td>
                <td colspan="2">28</td>
                <td colspan="2">23.9</td>
                <td>1.6</td>
                <td>63.6</td>
                <td>55.3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Suspected, n</td>
                <td>6000</td>
                <td>60</td>
                <td colspan="2">255</td>
                <td colspan="2">91</td>
                <td colspan="2">8</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td colspan="15">
                  <bold>Suspection statistic (threshold=0.125)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not suspected, n</td>
                <td>4,280,600</td>
                <td>42,806</td>
                <td colspan="2">1343</td>
                <td colspan="2">62</td>
                <td colspan="2">33</td>
                <td colspan="2">28.5</td>
                <td>2.4</td>
                <td>56.6</td>
                <td>46.9</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Suspected, n</td>
                <td>3300</td>
                <td>33</td>
                <td colspan="2">178</td>
                <td colspan="2">81</td>
                <td colspan="2">3</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>HAE: hereditary angioedema.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>Excluding “not D84.1” patients.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>Including “not D84.1” patients.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The threshold value of 0.1 had a sensitivity of 52.5% and precision of 27.1%, indicating that 1 out of 2 known HAE group participants can be correctly detected, and 1 out of 4 detected participants should correctly belong to the HAE group. If we exclude the HAE group participants who were not diagnosed with D84.1, the sensitivity was 61.5%. This result was calculated based on 1% of the sample size of the original control group; thus, by multiplying the number of all participants from the control group by 100, we obtained a 100% scale precision of 2%. This was 2 times better than the 1% precision goal set at the beginning of the study. This means that based on this model, 1 out of 50 suspected patients is highly likely to have HAE. Considering that HAE prevalence is estimated to be 1 in 50,000 people, we can expect to find undiagnosed patients with HAE quickly and efficiently using this model output.</p>
        <p>From a conservative standpoint, the threshold value of 0.1 seems optimal. However, to identify more potential patients with HAE, it might be better to apply the 0.075 threshold, which has a sensitivity of 55.3% and a precision of 23.9%. If we recalculate the 100% scale precision in the same manner as described above, we obtain 1.6%. This means we can still achieve our goal of 1% precision while improving the sensitivity.</p>
        <p>In addition, we need to consider the fact that the ratio of suspected patients in the US dataset can be calculated to be approximately 0.09% with a 0.1 threshold and 0.15% with a threshold of 0.075. If this model is to be used on a much smaller volume dataset compared with the US dataset, there is an approximately 2 times higher risk of obtaining zero suspected patients with a 0.1 threshold than with the 0.075 threshold.</p>
      </sec>
      <sec>
        <title>Application of the Model to Japanese EMR Data</title>
        <p>To verify the performance of this model using Japanese data, it was applied to patient data obtained from KUHP, and the output of potential patients with HAE was obtained based on the selected threshold. The diagnostic histories of these patients were stored at a single university hospital. Compared with the dataset used to build the original model, the variation and coverage of the entire diagnostic history were assumed to be relatively low. Therefore, we adopted a threshold value of 0.075 in this validation study to aggressively identify patients with HAE. We considered the HAE group (<xref rid="figure4" ref-type="fig">Figure 4</xref>) as the correct data for this validation.</p>
        <p>As shown in <xref ref-type="table" rid="table3">Table 3</xref>, 65 of 173 patients with HAE were detected using this model, indicating a sensitivity of 37.6%.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Cross-tabulation with precision and sensitivity scores of Kyoto University Hospital results.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="140"/>
            <col width="130"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td colspan="2"/>
                <td>Control group</td>
                <td colspan="3">HAE<sup>a</sup> group</td>
                <td colspan="2">Score (threshold=0.075)</td>
              </tr>
              <tr valign="top">
                <td colspan="2"/>
                <td/>
                <td>Prescripted</td>
                <td>Prescripted and<break/>diagnosed</td>
                <td>Diagnosed</td>
                <td>Precision (%)</td>
                <td>Sensitivity (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>Suspection statistic (threshold=0.075)</bold>
                </td>
                <td>3.2</td>
                <td>31.8</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not suspected</td>
                <td>701,829</td>
                <td>2</td>
                <td>13</td>
                <td>93</td>
                <td/>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>Suspected</td>
                <td>211</td>
                <td>1</td>
                <td>6</td>
                <td>58</td>
                <td/>
                <td/>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>HAE: hereditary angioedema.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Some patients in the HAE group did not have a diagnostic history specific to HAE (eg, abdominal pain, swelling, or edema) within the KUHP data. Their common symptoms might have been treated by their primary doctors or clinics and not at this university hospital. Furthermore, because HAE is a hereditary disorder, some patients may have been diagnosed through family testing. These factors appear to lead to a lower sensitivity score for the Japanese dataset than that for the US data.</p>
        <p>The precision score was 23.6%, which is more than 14 times higher than that of the initial model. As mentioned in the Introduction section, only 20% of patients in Japan are diagnosed with HAE, which means that 80% of patients with HAE are undiagnosed. Therefore, the 211 patients from the control group who were suspected to have HAE in our model may be undiagnosed patients with HAE.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, we developed an AI model for screening patients with HAE and validated its performance using 2 methods.</p>
        <p>First, a large patient dataset was selected to build a model containing patient-level linked claims and EMR data from the United States. The advantage of this dataset is that it contains a long-term prescription and diagnostic history across multiple medical institutes. The diagnostic characteristics of patients with HAE were determined by analyzing the dataset. Based on these characteristics, we constructed a generalized linear model with regularization terms. At a threshold of 0.1, the sensitivity score was 52.5% and the precision score was 27.1% if patients with possible HAE were included in the correct answer group. When these were excluded from the correct answers, the sensitivity score was 61.5%.</p>
        <p>We then applied this model to Japanese EMR data. This validation was conducted at a single university hospital using DWH data. Generally, patients often visit local hospitals and rarely visit university hospitals if they present with common symptoms. Considering this situation, data obtained from a single university may have some difficulty with model performance. Although the sensitivity score was lower than that of the US dataset (37.6%), the precision score reached 23.6% with a threshold value of 0.075. This implies that our model has a high possibility of identifying patients with undiagnosed HAE in Japan.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Our study had several limitations. Generally, because HAE is a rare disease, patient group data (correct answer data in machine learning) are quite small. In addition, the variance in each patient’s features was larger than that in common diseases. We also suggest possible limitations and countermeasures.</p>
        <sec>
          <title>Family History</title>
          <p>In our basic analysis of the HAE group, we found that some patients in the HAE group had a lower diagnostic history than others. We suspected that these patients had been diagnosed with HAE based on their family histories. Because our model uses the diagnostic history to calculate the probability, these cases are potentially difficult to detect.</p>
        </sec>
        <sec>
          <title>US Patient Data Consists of Data From Multiple Hospitals</title>
          <p>Our model may rely on the fact that US patient data consists of data from multiple hospitals. Collecting data from multiple hospitals will allow tracking of the records of a single patient across these hospitals and provide a more detailed medical history. For validation in the Japanese dataset, we could only use data from a single university hospital, which may be one of the reasons for the low sensitivity.</p>
        </sec>
        <sec>
          <title>Potential Patients With HAE Might Be Included in the Control + D84.1 Groups</title>
          <p>Since the HAE diagnosis rate was low, it is likely that there were more patients with HAE in the control + D84.1 groups. In our approach, we assigned the HAE group a prescription history of HAE drugs to keep the model conservative.</p>
        </sec>
        <sec>
          <title>Possible Difference in Diagnostic Tendency Between the United States and Japan</title>
          <p>If there are differences in how doctors make diagnostics between countries, we may need to customize the model or threshold to adapt it to Japan and other countries.</p>
        </sec>
      </sec>
      <sec>
        <title>Comparison With Previous Work</title>
        <p>Few previous studies have focused on screening patients for rare diseases based on diagnostic histories such as medical claims. Nonetheless, some studies have focused on a few rare diseases. For example, a previous study used AI models based on diagnostic history to identify patients with Pompe disease [<xref ref-type="bibr" rid="ref17">17</xref>]. In this study, 104 patients were flagged by the model to have the disease, but only 19 were determined by specialists to have a high likelihood of having Pompe disease, rendering a precision score of 18.27% [<xref ref-type="bibr" rid="ref17">17</xref>]. In comparison, our model recorded a precision of 23.6%. Screening for rare diseases is extremely difficult compared with other common diseases, for which abundant data exist; however, our results indicate that AI models can show high performance for screening rare diseases.</p>
      </sec>
      <sec>
        <title>Conclusions and Future Directions</title>
        <p>Considering the prevalence of HAE (1/50,000), the screening performance of this model was 1,000 times greater than that achieved through random searching using US data. Owing to their prevalence and recognition rates, identifying undiagnosed patients with rare diseases is an arduous task. This study suggests that patient screening for HAE may become significantly more efficient if this AI model is used. This approach is particularly valuable for the diagnosis and treatment of rare diseases.</p>
        <p>In addition, during the validation phase using the Japanese data, the model was effective at a single university hospital. Although only the diagnosis codes recorded in the EMR were available, the model could detect patients with typical symptoms of HAE. The performance of the model can likely be improved further if this model is applied to the data from city hospitals or medical claims, which contain diagnostic histories of patients in multiple medical institutions. This can provide more comprehensive information on the symptoms and diagnostic histories of each patient.</p>
        <p>In this study, only patients with a diagnostic history of HAE within the dataset were defined as correct answers. By providing a diagnosis rate, these data may include patients with undetected HAE. The model performance cannot be strictly calculated in such situations. Therefore, further studies are needed to determine whether patients with undiagnosed HAE should be included in the predicted group. This is because identifying undiagnosed patients with HAE is a critical issue, especially in Japan; we will implement a prospective clinical study using our AI model.</p>
        <p>The constructed model may help researchers, physicians, and other health care professionals identify undiagnosed HAE cases. Eventually, if this strategy can identify undiagnosed patients and provide them with proper treatment, their quality of life will likely be improved.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Table of the explanatory variables.</p>
        <media xlink:href="medinform_v12i1e59858_app1.png" xlink:title="PNG File , 147 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">DISCOVERY</term>
          <def>
            <p>Diagnostic Consortium to Advance the Ecosystem for Hereditary Angioedema</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">DWH</term>
          <def>
            <p>data warehouse</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HAE</term>
          <def>
            <p>hereditary angioedema</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">KUHP</term>
          <def>
            <p>Kyoto University Hospital</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">MEDIS-DC</term>
          <def>
            <p>Medical Information System Development Center</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We acknowledge the support received from DISCOVERY (Diagnostic Consortium to Advance the Ecosystem for Hereditary Angioedema), which covered the publication costs of the study.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bork</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Staubach</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hardt</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Hereditary angioedema: new findings concerning symptoms, affected organs, and course</article-title>
          <source>Am J Med</source>
          <year>2006</year>
          <volume>119</volume>
          <issue>3</issue>
          <fpage>267</fpage>
          <lpage>274</lpage>
          <pub-id pub-id-type="doi">10.1016/j.amjmed.2005.09.064</pub-id>
          <pub-id pub-id-type="medline">16490473</pub-id>
          <pub-id pub-id-type="pii">S0002-9343(05)01081-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zuraw</surname>
              <given-names>BL</given-names>
            </name>
          </person-group>
          <article-title>Clinical practice. Hereditary angioedema</article-title>
          <source>N Engl J Med</source>
          <year>2008</year>
          <volume>359</volume>
          <issue>10</issue>
          <fpage>1027</fpage>
          <lpage>1036</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMcp0803977</pub-id>
          <pub-id pub-id-type="medline">18768946</pub-id>
          <pub-id pub-id-type="pii">359/10/1027</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ohsawa</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <source>Nanbyo Iden-sei kekkann-sei fushu HAE(An intractable disease: Hereditary Angioedema (HAE))</source>
          <year>2016</year>
          <publisher-loc>Osaka</publisher-loc>
          <publisher-name>Iyaku Jānarusha</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Iwamoto</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yamamoto</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ohsawa</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Honda</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Horiuchi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tanaka</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fukunaga</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Maehara</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yamashita</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Akita</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hide</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The diagnosis and treatment of hereditary angioedema patients in Japan: a patient reported outcome survey</article-title>
          <source>Allergol Int</source>
          <year>2021</year>
          <volume>70</volume>
          <issue>2</issue>
          <fpage>235</fpage>
          <lpage>243</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/346771059_The_diagnosis_and_treatment_of_hereditary_angioedema_patients_in_Japan_A_patient_reported_outcome_survey"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.alit.2020.09.008</pub-id>
          <pub-id pub-id-type="medline">33168485</pub-id>
          <pub-id pub-id-type="pii">S1323-8930(20)30135-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bellanti</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Settipane</surname>
              <given-names>RA</given-names>
            </name>
          </person-group>
          <article-title>The Floralia: a festive time for Romans and a demanding time for the allergist/immunologist</article-title>
          <source>Allergy Asthma Proc</source>
          <year>2018</year>
          <volume>39</volume>
          <issue>3</issue>
          <fpage>167</fpage>
          <lpage>168</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29669662"/>
          </comment>
          <pub-id pub-id-type="doi">10.2500/aap.2018.39.4141</pub-id>
          <pub-id pub-id-type="medline">29669662</pub-id>
          <pub-id pub-id-type="pmcid">PMC5911509</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zanichelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Magerl</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Longhurst</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Fabien</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Maurer</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Hereditary angioedema with C1 inhibitor deficiency: delay in diagnosis in Europe</article-title>
          <source>Allergy Asthma Clin Immunol</source>
          <year>2013</year>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>29</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aacijournal.biomedcentral.com/articles/10.1186/1710-1492-9-29"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1710-1492-9-29</pub-id>
          <pub-id pub-id-type="medline">23937903</pub-id>
          <pub-id pub-id-type="pii">1710-1492-9-29</pub-id>
          <pub-id pub-id-type="pmcid">PMC3751114</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
          <source>DISCOVERY (Diagnostic Consortium to Advance the Ecosystem for Hereditary Angioedema)</source>
          <access-date>2024-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://discovery0208.or.jp/en/top/">https://discovery0208.or.jp/en/top/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Merative</collab>
          </person-group>
          <source>Real-world evidence solutions for life sciences</source>
          <access-date>2024-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.merative.com/content/dam/merative/documents/brief/real-world-evidence-solution-brief.pdf">https://www.merative.com/content/dam/merative/documents/brief/real-world-evidence-solution-brief.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>WHO</collab>
          </person-group>
          <source>International Statistical Classification of Diseases and Related Health Problems (ICD)</source>
          <access-date>2024-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/standards/classifications/classification-of-diseases">https://www.who.int/standards/classifications/classification-of-diseases</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Efron</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Bootstrap methods: another look at the jackknife</article-title>
          <source>Ann Stat</source>
          <year>1979</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>26</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://sites.stat.washington.edu/courses/stat527/s14/readings/ann_stat1979.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1214/aos/1176344552</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bradley</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <source>An Introduction to the Bootstrap</source>
          <year>1993</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Chapman &#38; Hall</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McCullagh</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Nelder</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Generalized Linear Models. 2nd edition</source>
          <year>1989</year>
          <publisher-loc>London, UK</publisher-loc>
          <publisher-name>Chapman &#38; Hall/CRC</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Regression shrinkage and selection via the Lasso</article-title>
          <source>J R Stat Soc Series B (Methodological)</source>
          <year>1996</year>
          <volume>58</volume>
          <issue>1</issue>
          <fpage>267</fpage>
          <lpage>288</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://webdoc.agsci.colostate.edu/koontz/arec-econ535/papers/Tibshirani%20(JRSS-B%201996).pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hoerl</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Kennard</surname>
              <given-names>RW</given-names>
            </name>
          </person-group>
          <article-title>Ridge regression: biased estimation for nonorthogonal problems</article-title>
          <source>Technometrics</source>
          <year>1970</year>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>55</fpage>
          <lpage>67</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://homepages.math.uic.edu/~lreyzin/papers/ridge.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <source>Medical Information System Development Center (MEDIS-DC)</source>
          <access-date>2024-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.medis.or.jp/">http://www.medis.or.jp/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Medis</collab>
          </person-group>
          <source>Standard disease name master for ICD-10</source>
          <access-date>2024-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www2.medis.or.jp/stdcd/byomei/byomei.html">http://www2.medis.or.jp/stdcd/byomei/byomei.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nateqi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Weingartner-Ortner</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gruarin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Marling</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pilgram</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Lagler</surname>
              <given-names>FB</given-names>
            </name>
            <name name-style="western">
              <surname>Aigner</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>AG</given-names>
            </name>
          </person-group>
          <article-title>An artificial intelligence-based approach for identifying rare disease patients using retrospective electronic health records applied for pompe disease</article-title>
          <source>Front Neurol</source>
          <year>2023</year>
          <volume>14</volume>
          <fpage>1108222</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37153672"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fneur.2023.1108222</pub-id>
          <pub-id pub-id-type="medline">37153672</pub-id>
          <pub-id pub-id-type="pmcid">PMC10160659</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
