<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i8e37842</article-id>
      <article-id pub-id-type="pmid">35969459</article-id>
      <article-id pub-id-type="doi">10.2196/37842</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Identifying Patients Who Meet Criteria for Genetic Testing of Hereditary Cancers Based on Structured and Unstructured Family Health History Data in the Electronic Health Record: Natural Language Processing Approach</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Hao</surname>
            <given-names>Tianyong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Fu</surname>
            <given-names>Sunyang</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Xia</surname>
            <given-names>Jingbo</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Han</surname>
            <given-names>Peijin</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Shi</surname>
            <given-names>Jianlin</given-names>
          </name>
          <degrees>MS, MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2950-8038</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Morgan</surname>
            <given-names>Keaton L</given-names>
          </name>
          <degrees>MS, MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9140-4454</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Bradshaw</surname>
            <given-names>Richard L</given-names>
          </name>
          <degrees>MS, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7363-0327</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Jung</surname>
            <given-names>Se-Hee</given-names>
          </name>
          <degrees>BSN</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8149-0993</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Kohlmann</surname>
            <given-names>Wendy</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9134-9640</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Kaphingst</surname>
            <given-names>Kimberly A</given-names>
          </name>
          <degrees>SCD</degrees>
          <xref rid="aff7" ref-type="aff">7</xref>
          <xref rid="aff8" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2668-9080</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Kawamoto</surname>
            <given-names>Kensaku</given-names>
          </name>
          <degrees>MPH, MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4282-9338</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Fiol</surname>
            <given-names>Guilherme Del</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <address>
            <institution>Department of Biomedical Informatics</institution>
            <institution>University of Utah</institution>
            <addr-line>421 Wakara Way</addr-line>
            <addr-line>Ste 140</addr-line>
            <addr-line>Salt Lake City, UT, 84108-3514</addr-line>
            <country>United States</country>
            <fax>1 801 581 4297</fax>
            <phone>1 801 581 4080</phone>
            <email>guilherme.delfiol@utah.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9954-6799</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Veterans Affairs Informatics and Computing Infrastructure</institution>
        <institution>Department of Veterans Affairs Salt Lake City Health Care System</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Division of Epidemiology, Department of Internal Medicine</institution>
        <institution>School of Medicine</institution>
        <institution>University of Utah</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>University of Utah</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Emergency Medicine</institution>
        <institution>University of Utah</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>College of Nursing</institution>
        <institution>University of Utah</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Department of Population Health Sciences</institution>
        <institution>University of Utah</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Huntsman Cancer Institute</institution>
        <institution>University of Utah</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff8">
        <label>8</label>
        <institution>Department of Communication</institution>
        <institution>University of Utah</institution>
        <addr-line>Salt Lake City, UT</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Guilherme Del Fiol <email>guilherme.delfiol@utah.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>11</day>
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>8</issue>
      <elocation-id>e37842</elocation-id>
      <history>
        <date date-type="received">
          <day>9</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>4</day>
          <month>5</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>29</day>
          <month>6</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>6</day>
          <month>7</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Jianlin Shi, Keaton L Morgan, Richard L Bradshaw, Se-Hee Jung, Wendy Kohlmann, Kimberly A Kaphingst, Kensaku Kawamoto, Guilherme Del Fiol. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 11.08.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/8/e37842" xlink:type="simple"/>
      <related-article related-article-type="correction-forward" xlink:title="This is a corrected version. See correction statement in:" xlink:href="https://medinform.jmir.org/2022/9/e42533" vol="6" page="e42533"> </related-article>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Family health history has been recognized as an essential factor for cancer risk assessment and is an integral part of many cancer screening guidelines, including genetic testing for personalized clinical management strategies. However, manually identifying eligible candidates for genetic testing is labor intensive.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to develop a natural language processing (NLP) pipeline and assess its contribution to identifying patients who meet genetic testing criteria for hereditary cancers based on family health history data in the electronic health record (EHR). We compared an algorithm that uses structured data alone with structured data augmented using NLP.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Algorithms were developed based on the National Comprehensive Cancer Network (NCCN) guidelines for genetic testing for hereditary breast, ovarian, pancreatic, and colorectal cancers. The NLP-augmented algorithm uses both structured family health history data and the associated unstructured free-text comments. The algorithms were compared with a reference standard of 100 patients with a family health history in the EHR.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Regarding identifying the reference standard patients meeting the NCCN criteria, the NLP-augmented algorithm compared with the structured data algorithm yielded a significantly higher recall of 0.95 (95% CI 0.9-0.99) versus 0.29 (95% CI 0.19-0.40) and a precision of 0.99 (95% CI 0.96-1.00) versus 0.81 (95% CI 0.65-0.95). On the whole data set, the NLP-augmented algorithm extracted 33.6% more entities, resulting in 53.8% more patients meeting the NCCN criteria.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Compared with the structured data algorithm, the NLP-augmented algorithm based on both structured and unstructured family health history data in the EHR increased the number of patients identified as meeting the NCCN criteria for genetic testing for hereditary breast or ovarian and colorectal cancers.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>clinical natural language processing</kwd>
        <kwd>family health history extraction</kwd>
        <kwd>cohort identification</kwd>
        <kwd>genetic testing of hereditary cancers</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Cancer screening has been shown to effectively reduce mortality [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Unlike population-based screening recommendations that target a broad range of individuals, increasing evidence supports individualized cancer screening according to cancer risk [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Individuals at higher risk may benefit from earlier, more frequent, or more intensive screening. Effective interventions are needed to stratify patients by risk and to direct them to an appropriate level of screening. However, individualizing screening on a population scale requires patient-specific risk assessments for several types of cancer. This is quite challenging in today’s overwhelmed primary care environment, as the current screening process requires manual chart review to identify patient candidates for genetic testing, and primary care providers often do not have time or knowledge to discuss genetic testing with their patients. A promising solution is to automate the identification of high-risk patients using electronic health records (EHRs) coupled with clinical decision support (CDS) tools.</p>
        <p>The National Comprehensive Cancer Network (NCCN) has published a set of evidence-based guidelines for genetic testing of hereditary cancers, including breast, ovarian, pancreatic, and colorectal cancers [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. A summary of these 2 guidelines is listed in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>, where each table cell represents a criterion, and the criteria for the same cancer cohort are listed in the same column. When one or more criteria are met, the corresponding genetic testing is recommended. These cancer risk assessment guidelines are based mainly on the family health history (FHH) of cancer or cancer syndromes, which is recorded in EHR systems as part of routine patient care activities. Therefore, EHR is one of the most important sources of FHH that can be used to drive CDS tools to help identify candidates for genetic testing of hereditary cancers [<xref ref-type="bibr" rid="ref8">8</xref>]. However, several challenges limit the systematic use of FHH in EHR for these purposes, including (1) scattered FHH documentation in both structured and unstructured formats across different EHR sections, such as the clinical note [<xref ref-type="bibr" rid="ref9">9</xref>], problem list, and FHH sections; (2) conflicting documentation in different sections of the EHR; (3) incomplete documentation in structured FHH data; (4) negation and ambiguity of information in unstructured data [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref12">12</xref>].</p>
        <boxed-text id="box1" position="float">
      <title>Excerpt of National Comprehensive Cancer Network (NCCN) criteria for unaffected individuals’ family history–based genetic testing of breast, ovarian, pancreatic, and colorectal cancers (referenced with permission).</title>
      <p>
        <bold>Breast or ovarian cancer:</bold>
      </p>
      <list list-type="order">
        <list-item>
          <p>First- or second-degree relative with breast cancer at age ≤45 years</p>
        </list-item>
        <list-item>
          <p>First- or second-degree relative with ovarian cancer</p>
        </list-item>
        <list-item>
          <p>First-degree relative with pancreatic cancer</p>
        </list-item>
        <list-item>
          <p>Breast cancer in a male relative</p>
        </list-item>
        <list-item>
          <p>Three or more first- or second-degree relatives with breast or prostate cancer on the same side of the family</p>
        </list-item>
        <list-item>
          <p>Ashkenazi Jewish and any breast or prostate cancer in any relative at any age</p>
        </list-item>
        <list-item>
          <p>BRCA1/2, CHEK2, ATM, PALB2, TP53, PTEN, or CDH1 genes, Cowden Syndrome, Li-Fraumeni Syndrome in any relative at any age</p>
        </list-item>
      </list>
      <p>
        <bold>Colorectal cancer:</bold>
      </p>
      <list list-type="order">
        <list-item>
          <p>MLH1, MSH2, PMS2, MSH6, EPCAM, MYH, or MUTYH genes, Lynch syndrome, familial adenomatous polyposis (FAP), adenomatous polyposis coli (APC), serrated polyposis or polyposis discovered in the coded family history</p>
        </list-item>
        <list-item>
          <p>First-degree relative with colon cancer at ≤50 years</p>
        </list-item>
        <list-item>
          <p>First-degree relative with endometrial cancer at ≤50 years</p>
        </list-item>
        <list-item>
          <p>Three or more first- or second-degree relatives with Lynch syndrome, HNPCC, colon cancer, endometrial, uterine, ovarian, stomach, gastric, small bowel, small intestine, kidney, ureteral, bladder, urethra, brain, pancreas, also all on the same side of the family</p>
        </list-item>
      </list>
    </boxed-text>
        <p>Genetic testing for breast, ovarian, or colorectal cancer is recommended if at least one of these criteria is met.</p>
        <p>Current EHR systems often provide a dedicated FHH section, in which FHH assertions can be captured using a combination of structured (eg, coded disease, relationship, and age of onset) and unstructured data (ie, the comments field). FHH free-text comments are different from broader clinical notes in that the former are associated with a specific structured FHH assertion, only available in the FHH section, while clinical notes can capture a much wider range of information, including medical history, physical examination, and treatment plans. Health care providers typically use free-text FHH comment fields when desired information cannot be fully captured as structured data. For example, a patient’s sister who developed breast cancer in her 30s can be captured partially as structured data (ie, condition = <italic>breast cancer</italic> and family member = <italic>sister</italic>) supplemented by a comment captured in the unstructured data conveying the uncertain age of onset (ie, <italic>onset in her 30s</italic>). The FHH section is increasingly used as part of routine visit intake by medical assistants and by patients themselves through patient portals [<xref ref-type="bibr" rid="ref13">13</xref>]. Therefore, the FHH section is a promising and underused source of FHH for EHR.</p>
        <p>Previous studies have largely focused on extracting FHH from clinical notes [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. This study is the first comprehensive attempt to supplement structured FHH data with information extracted from free-text comments. The natural language processing (NLP) extraction of information from free-text comments imposes a unique set of challenges that require specific approaches that have not been investigated. Specifically, candidate approaches must address the interplay between structured and unstructured data collected in the FHH section.</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>Our previously developed structured algorithm [<xref ref-type="bibr" rid="ref8">8</xref>] for identifying patients who met the NCCN criteria for genetic testing using structured data demonstrated the potential use of this dedicated FHH section. Nonetheless, we noticed that the algorithm based on structured data failed to correctly identify certain cases because some information needed for eligibility determination was recorded as free-text comments. For example, an FHH entry included <italic>CANCER</italic> and <italic>AUNT</italic> as structured data, with the specific type of cancer and age of onset (<italic>breast ca, dx in 30s</italic>) provided as a free-text comment. This case would be considered eligible for genetic testing when using the information provided in the comments section. These errors resulting from the structured data algorithm added a manual review burden for genetic counseling staff because they needed to manually confirm patient eligibility before communicating with them.</p>
        <p>Hence, this study aims to augment CDS algorithms that rely exclusively on structured FHH data with information extracted from free-text FHH comments fields using NLP, with a focus on identifying patients who meet the NCCN criteria for genetic testing for hereditary breast or ovarian and colorectal cancers. The corresponding NLP was designed to extract the FHH information when it was not available or accurately coded in structured data, including the cancer type (eg, pancreatic cancer), the age of onset (eg, in the early 30s), and the affected family member (eg, <italic>paternal aunt</italic>). The primary hypothesis is that using NLP to augment the previously developed algorithm (using structured data alone) [<xref ref-type="bibr" rid="ref8">8</xref>] can improve the accuracy of identifying patients who meet the NCCN criteria for genetic testing based on the FHH of patients seen in primary care settings at a US academic medical center.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design</title>
        <p>We retrospectively studied data from the EHR at the University of Utah Health. The study consisted of 2 stages (<xref rid="figure1" ref-type="fig">Figure 1</xref>). In the first stage, for NLP development, an NLP solution was developed to extract FHH information from both structured and unstructured data in the FHH section of EHR, and its performance was evaluated in comparison with gold standard annotation results. Next, we developed an NLP-augmented algorithm on top of the structured data algorithm (using only structured data) [<xref ref-type="bibr" rid="ref8">8</xref>] to match the NCCN criteria using the NLP-processed results from both structured and unstructured fields. In the second stage, the performance of the NLP-augmented algorithm was compared with that of the structured data algorithm.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Study stages, including natural language processing (NLP) development (stage 1) and comparison between the NLP-augmented algorithm and an algorithm using only structured data (stage 2). EDW: enterprise data warehouse; FHH: family health history.</p>
          </caption>
          <graphic xlink:href="medinform_v10i8e37842_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Sets</title>
        <p>The data set for NLP development and evaluation consisted of EHR-based data from the FHH section (including both structured and unstructured fields) for 77,423 patients aged between 25 and 60 years who visited the University of Utah Health primary care clinic at least once between May 1, 2018, and April 30, 2019. All FHH entries of these patients were obtained, including entries recorded in prior visits to June 26, 2014. FHH entries contained a coded condition (breast cancer), a coded relative (sister), age of onset integer, and a free-text comment clinicians used to add detail (<italic>in her 30s</italic>). Entries that were not used to determine familial cancer risk were filtered using Structured Query Language (SQL), resulting in 31,191 entries. The detailed filtering strategy is illustrated in <xref rid="figure2" ref-type="fig">Figure 2</xref>.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Data set creation process. FHH: family health history. NCCN: National Comprehensive Cancer Network. NLP: natural language processing. *HNPCC: hereditary non-polyposis colorectal cancer. FAP: familial adenomatous polyposis. Other genetic mutations or cancer syndromes specified in the NCCN guideline but without a code in electronic health record (EHR) were not included.</p>
          </caption>
          <graphic xlink:href="medinform_v10i8e37842_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The data set was split into 2. The FHH entries that were entered before June 26, 2018 were used for NLP development and evaluation (ie, the NLP development or evaluation data set), while entries entered after that date were used for algorithm evaluation (ie, the NCCN algorithm evaluation data set). We obtained a stratified random sample of 2300 FHH entries from the NLP development and evaluation data set. The stratification was based on the diagnosis codes in the condition field and stratified into four groups: (1) breast or ovarian cancer, (2) colorectal cancer, (3) other cancers, and (4) other noncancer family histories, at a 1:1:2:2 ratio. We randomly split 1300 FHH entries for NLP development, and the remaining 1000 entries were used for the snippet-level NLP evaluation. The NCCN algorithm evaluation data set was used to compare the performance of the 2 algorithms. Then, all the FHH entries (both data sets) were used to estimate the amount of additional information extracted by NLP and compare the patients identified by the NLP-augmented algorithm with those identified by the structured data algorithm.</p>
      </sec>
      <sec>
        <title>NLP Approach</title>
        <sec>
          <title>Overview</title>
          <p>Although NLP is often only used to process free-text data, independent of structured data, the comments field in the FHH section of EHR is used to supplement the structured data and cannot be interpreted in isolation. For example, in <xref ref-type="table" rid="table1">Table 1</xref>, the word <italic>breast</italic> supplements the concept <italic>CANCER</italic> in the structured condition field. Therefore, we concatenated the structure and comments fields into a single string for NLP processing. We also used double curly brackets to mark the values from the structured fields to reconcile conflicting information between the structured and comments fields (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>An example of combining structured and unstructured data from FHH<sup>a</sup> assertions.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="170"/>
              <col width="170"/>
              <col width="320"/>
              <col width="170"/>
              <col width="170"/>
              <thead>
                <tr valign="top">
                  <td>Field names</td>
                  <td>Condition</td>
                  <td>Comments<sup>b</sup></td>
                  <td>Family member</td>
                  <td>Age of onset</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Original data</td>
                  <td>CANCER</td>
                  <td>Breast, great-aunt, dx at age of 52</td>
                  <td>AUNT</td>
                  <td>NULL</td>
                </tr>
                <tr valign="top">
                  <td>Combined</td>
                  <td>{{CANCER}}</td>
                  <td>Breast, great-aunt, dx at age of 52</td>
                  <td>{{AUNT}}</td>
                  <td>{{}}</td>
                </tr>
                <tr valign="top">
                  <td>Annotations</td>
                  <td colspan="4">
                    <graphic xlink:href="medinform_v10i8e37842_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
                  </td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>FHH: family health history.</p>
              </fn>
              <fn id="table1fn2">
                <p><sup>b</sup>In this case, the comments field supplements or corrects the structured data, that is, CANCER is of the <italic>breast</italic>, and the family member (AUNT) is actually the patient’s great-aunt. FX_CANCER (FC): family member to cancer relationship; FX_ONSET: family member to age of onset relationship.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>FHH Annotation Schema</title>
          <p>A total of 2 physicians designed the annotation schema based on the FHH attributes relevant to the NCCN guidelines for genetic testing of hereditary breast or ovarian and colorectal cancers. This schema encompasses conditions, family members, and the age of onset. Specifically, the snippet-level data set contains (1) annotated entities for cancer diagnosis (CANCER), cancer-related syndromes (SYNDROME), cancer-related genetic mutations (GENE_MUT), family members (FAMILYMEMBER), and age of onset (ONSET), and (2) relations between family members and conditions, as well as between family members and age of onset. The example provided in <xref ref-type="table" rid="table1">Table 1</xref> has 3 entities, that is, <italic>great-aunt</italic> (FAMILYMEMBER), ([CANCER]) <italic>breast</italic> (BREAST−breast cancer), <italic>52</italic> (ONSET_AGE), and 2 relations, that is, <italic>great aunt</italic> → <italic>{{CANCER}} breast</italic> (FX_CANCER) and <italic>great aunt</italic> → <italic>52</italic> (FX_ONSET). As the NCCN criteria include other cancers with mutations that share a common genetic pathway with breast, ovarian, and colorectal cancers, we added the following annotation subtypes: BLADDER, BREAST, BRAIN, COLON, KIDNEY, OVARIAN, PANCREAS, PROSTATE, RECTAL, STOMACH, SMALL_INTESTINE, URETERAL, and URETHRAL. As the NCCN criteria also use the side of the family of the affected family member and the degree of relationship, 2 attributes were included: family member CODE (eg, <italic>GRANDMOTHER</italic>) and SIDE of FAMILYMEMBER (eg, <italic>PATERNAL</italic>). In addition, an UNCERTAINTY feature was added to capture uncertainty statements (eg, <italic>probably ovarian cancer).</italic> We used a schema developed in our previous studies to annotate the age of onset [<xref ref-type="bibr" rid="ref10">10</xref>], which includes 4 subtypes: ONSET_AGE (eg, <italic>age 52</italic>), ONSET_RANGE (eg, <italic>in his 30s</italic>), ONSET_PERIOD (eg, <italic>in 1965</italic>), and ONSET_STRING (eg, <italic>postmenstruation</italic>). <xref rid="figure3" ref-type="fig">Figure 3</xref> presents a screenshot of the full schema within the annotation tool (Brat) [<xref ref-type="bibr" rid="ref16">16</xref>]. The schema configuration is shared in GitHub [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Screenshot of the schema as implemented with the annotation tool Brat.</p>
            </caption>
            <graphic xlink:href="medinform_v10i8e37842_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>NLP Development</title>
          <p>To develop the NLP pipeline, we used Easy clinical information extractor (EasyCIE), a lightweight rule-based NLP tool that supports rapid clinical NLP implementations [<xref ref-type="bibr" rid="ref18">18</xref>]. All NLP components of EasyCIE are configurable through rules without the need to develop new pipelines. A total of 1300 FHH entries were used to develop the rules. We adopted a logic similar to that described by Goryachev et al [<xref ref-type="bibr" rid="ref19">19</xref>] but implemented the logic in a different way for efficiency and generalizability considerations [<xref ref-type="bibr" rid="ref20">20</xref>]. The processing consists of three major steps: (1) entity extraction, (2) entity reconciliation, and (3) relation identification (<xref rid="figure4" ref-type="fig">Figure 4</xref>). Each step was performed using one or more NLP components. The following paragraph explains these components using the examples in <xref ref-type="table" rid="table1">Table 1</xref>. Each component is configured using a separate rule set that incorporates a keyword dictionary or inference logic. These rules were developed based on 3 sources: Unified Medical Language System, training data set, and clinical domain experts’ input. The rule set is available on GitHub [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Easy clinical information extractor processing workflow. Three major steps (blue boxes): (1) entity extraction—extract the entities from the family health history entries; (2) entity reconciliation—reconcile the conflicts between the extracted entities; (3) relation identification—link related entities. In each step, there are ≥1 natural language processing components to complete processing substeps.</p>
            </caption>
            <graphic xlink:href="medinform_v10i8e37842_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p><italic>Entity extraction</italic> (step 1) extracts the key entities (5 types) from the FHH entries. First, we split the sentences if there were more than one sentence. Second, we attempted to match the input string with controlled vocabulary (a keyword dictionary). An example is shown in <xref ref-type="table" rid="table1">Table 1</xref>, <italic>{{CANCER}}</italic> breast was recognized as <italic>BREAST</italic> (cancer), <italic>52</italic> as <italic>ONSET_AGE</italic>, and <italic>great aunt</italic> and <italic>AUNT</italic> as <italic>FAMILYMEMBER</italic>. Next, we detected the double curly brackets around <italic>AUNT</italic>. These 2 symbols indicate the mention of <italic>AUNT</italic> was located in the structured field. Thus, we assigned the feature <italic>is_structural</italic> to <italic>AUNT</italic>. Finally, we verified the features of each entity to determine whether they matched any inference rules. In the example, a <italic>FAMILYMEMBER</italic> with the <italic>is_structural</italic> feature was classified as <italic>STR_FAMILYMEMBER</italic> (a family member in the structured field). This differentiation among entities in different contexts allows entity reconciliation in the next step. This component also allowed us to exclude irrelevant mentions of entities (eg, the <italic>FAMILYMEMBER daughter</italic> in the context of <italic>live with her daughter</italic>). Further details regarding the implementation of EasyCIE's rule-processing engine are available elsewhere [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
          <p><italic>Entity reconciliation</italic> (step 2) reconciles the extracted entities from the first step when conflicts exist between structured and unstructured data. The corresponding heuristic rules were iteratively developed based on annotated data from the training set with refinements based on error analysis after applying the algorithm to the training set. In addition, we obtained insights through discussions with clinical experts, who currently use the dedicated FHH section to document FHH. Specifically, the following (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>) heuristic rules were applied (<xref ref-type="table" rid="table2">Table 2</xref>, additional examples are listed).</p>
          <boxed-text id="box2" position="float">
            <title>Heuristic rules.</title>
            <p>
              <bold>Rules</bold>
            </p>
            <list list-type="order">
              <list-item>
                <p>If the structured field indicated <italic>colon cancer</italic>, but the information in the comments field clarified the condition of interest to be colorectal cancer syndromes (eg, Lynch syndrome), SYNDROME overrode COLON (cancer) in the structured field</p>
              </list-item>
              <list-item>
                <p>If the age of onset was documented as structured numeric data (eg, 50) but the comments field documented an <italic>ONSET_RANGE</italic> (eg, late 50s), the ONSET_RANGE overrode the structured age of onset</p>
              </list-item>
              <list-item>
                <p>If the age of onset was available in structured data, and the comments field included <italic>ONSET_PERIOD</italic> (eg, in 1985) or <italic>ONSET_STRING</italic> (eg, 10 years ago), <italic>ONSET_PERIOD</italic> and <italic>ONSET_STRING</italic> &#62;were ignored. (4) If no age of onset was documented in the structured field and the comments field included a <italic>DECEASED_AGE</italic>, the algorithm set an <italic>ONSET_RANGE before</italic> the <italic>DECEASED_AGE</italic>.</p>
              </list-item>
              <list-item>
                <p>If the comments field contained information on a specific family member, the algorithm ignored the structured family member field unless the comments field included a conjunction such as <italic>also</italic> &#62;or <italic>and</italic>. In the example in <xref ref-type="table" rid="table1">Table 1</xref>, the <italic>FAMILYMEMBER great aunt</italic> was likely a correction of the STR_FAMILYMEMBER <italic>AUNT</italic> &#62;because the picklist associated with STR_FAMILYMEMBER did not include an option for <italic>great aunt</italic>. Thus, in the reconciliation, STR_FAMILYMEMBER <italic>AUNT</italic> is ignored.</p>
              </list-item>
              <list-item>
                <p>If the comments field contained nonspecific family member information (eg, father’s side), whereas the structured field contained a specific family member, the structured field code was used, and information from the comments was added as attributes if applicable.</p>
              </list-item>
              <list-item>
                <p>If a mention of <italic>FAMILYMEMBER</italic> was specified as multiple individuals (eg, 2× sisters), multiple instances of <italic>FAMILYMEMBER</italic> were created (eg, 2× sisters would lead to 2 instances).</p>
              </list-item>
            </list>
          </boxed-text>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Heuristic rules to reconcile entities.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="170"/>
              <col width="140"/>
              <col width="260"/>
              <col width="160"/>
              <col width="270"/>
              <thead>
                <tr valign="top">
                  <td>Structured fields</td>
                  <td>Example</td>
                  <td>Comments field</td>
                  <td>Example</td>
                  <td>Reconciliation</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td><italic>COLON</italic><sup>a</sup> (cancer)</td>
                  <td>{{CANCER, COLON}}</td>
                  <td>Colorectal cancer–related <italic>SYNDROME</italic></td>
                  <td>Lynch syndrome</td>
                  <td>Chose <italic>SYNDROME</italic></td>
                </tr>
                <tr valign="top">
                  <td>
                    <italic>ONSET_AGE</italic>
                  </td>
                  <td>{{50}}</td>
                  <td>
                    <italic>ONSET_RANGE</italic>
                  </td>
                  <td>The late 50s</td>
                  <td>Chose <italic>ONSET_RANGE</italic></td>
                </tr>
                <tr valign="top">
                  <td>
                    <italic>ONSET_AGE</italic>
                  </td>
                  <td>{{50}}</td>
                  <td>
                    <italic>ONSET_PERIOD</italic>
                  </td>
                  <td>In 1985</td>
                  <td>Chose <italic>ONSET_AGE</italic></td>
                </tr>
                <tr valign="top">
                  <td>
                    <italic>ONSET_AGE</italic>
                  </td>
                  <td>{{50}}</td>
                  <td>
                    <italic>ONSET_STRING</italic>
                  </td>
                  <td>10 years ago</td>
                  <td>Chose <italic>ONSET_AGE</italic></td>
                </tr>
                <tr valign="top">
                  <td>NULL</td>
                  <td>{{}}</td>
                  <td>
                    <italic>DECEASED_AGE</italic>
                  </td>
                  <td>Deceased at age 60 years</td>
                  <td>Inferred the <italic>ONSET_RANGE</italic></td>
                </tr>
                <tr valign="top">
                  <td>
                    <italic>FAMILYMEMBER</italic>
                  </td>
                  <td>{{AUNT}}</td>
                  <td>A specific <italic>FAMILYMEMBER</italic></td>
                  <td>Great-aunt</td>
                  <td>Chose <italic>FAMILYMEMBER</italic> in comments</td>
                </tr>
                <tr valign="top">
                  <td>
                    <italic>FAMILYMEMBER</italic>
                  </td>
                  <td>{{MOTHER}}</td>
                  <td>A specific <italic>FAMILYMEMBER</italic> with conjunction statement</td>
                  <td>And grandmother</td>
                  <td>Use <italic>FAMILYMEMBER</italic> in both fields</td>
                </tr>
                <tr valign="top">
                  <td>
                    <italic>FAMILYMEMBER</italic>
                  </td>
                  <td>{{AUNT}}</td>
                  <td>Nonspecific</td>
                  <td>Father side</td>
                  <td>Chose <italic>FAMILYMEMBER</italic>, and added comments value as a feature, if applicable</td>
                </tr>
                <tr valign="top">
                  <td>NULL</td>
                  <td>{{}}</td>
                  <td>Multiple <italic>FAMILYMEMBER</italic></td>
                  <td>2× sisters</td>
                  <td>Created two <italic>FAMILYMEMBER</italic> annotations</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table2fn1">
                <p><sup>a</sup>Words in italics denote concepts in the NLP output according to the FHH annotation schema.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <p><italic>Relation identification</italic> (step 3) links related entities. In the example of <xref ref-type="table" rid="table1">Table 1</xref>, <italic>great aunt</italic> and <italic>{{CANCER}} breast</italic> were linked to create an <italic>FX_CANCER</italic> relation. It also linked <italic>great aunt</italic> and <italic>52</italic> to create an <italic>FX_ONSET</italic> relation. As <italic>STR_FAMILYMEMBER AUNT</italic> was changed to <italic>IGN_FAMILYMEMBER</italic>, <italic>AUNT</italic> in the structured field were not linked to <italic>{{CANCER}} breast</italic> or <italic>52</italic>. When counting the number of relatives of interest, the number of FAMILYMEMBER-CANCER relations was obtained instead of relative entities. For example, <italic>ovarian and stomach cancer</italic> in <italic>grandmother</italic> should be counted as 2 cancers in the NCCN criteria. Although the NLP algorithm generated one FAMILYMEMBER entity (grandmother), two FAMILYMEMBER-CANCER relations were generated. The same process is followed to handle cases where a single cancer assertion refers to multiple relatives, eg, <italic>breast cancer in mother and aunt</italic>.</p>
        </sec>
        <sec>
          <title>NLP Performance Evaluation</title>
          <p>We evaluated the NLP solution by comparing its output with the test set annotations of the snippet-level data set (1000 FHH entries). To save time and effort, entities with no relation were not annotated (eg, an entry that only has a condition without mentioning any family member); therefore, we did not evaluate the NLP performance for named entity recognition. Precision, recall, and F1 scores were calculated for relation identification. A true positive relation was counted when NLP-extracted information matched the reference standard for both the relation type and corresponding feature values, as well as the two linked entities. We applied the bootstrap sampling method [<xref ref-type="bibr" rid="ref22">22</xref>] to estimate the 95% CI for each performance measurement and conducted error analyses by categorizing and counting different types of errors. Considering the mentions of <italic>SYNDROME</italic> (cancer syndrome) and <italic>GENETIC_MUTATION</italic> (cancer-related genetic mutation) were very rare in the data set, the CI for the performance related to the extraction of relations with these 2 entity types, that is, <italic>FX_SYNDROME</italic> (family member to cancer syndrome relation) and <italic>FX_GENE_MUT</italic> (family member to genetic mutation relation), could not be obtained. Thus, we only calculated the CIs of the microaverages of these 3 measurements using bootstrap methods over the aggregated data that included all 4 relation types.</p>
        </sec>
        <sec>
          <title>Structured Data Algorithm for Patient Eligibility Assessment</title>
          <p>A rule-based algorithm was previously developed [<xref ref-type="bibr" rid="ref8">8</xref>] based on NCCN guidelines for the genetic testing of hereditary breast or ovarian and colorectal cancers [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>] using only structured FHH data. The algorithm was implemented using an open-source CDS platform (OpenCDS [<xref ref-type="bibr" rid="ref23">23</xref>]) through a standards-based approach based on CDS Hooks for Services and the Fast Healthcare Interoperability Resources standard for FHH data representation. On the basis of the patient’s age and FHH, the algorithm determines whether the patient meets the NCCN criteria for genetic testing. The algorithm has been deployed for clinical use and integrated with the Epic EHR at the University of Utah Health and New York University. The details of the algorithm and its deployment in clinical practice are available elsewhere [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. In this study, we used a structured data algorithm as the baseline.</p>
        </sec>
        <sec>
          <title>NLP-Augmented Algorithm</title>
          <p>The NLP-augmented algorithm was built on top of the structured data algorithm by converting the NLP output into a structured FHH format (condition, family member, and age of onset). As a result, the same structured data algorithm consumes NLP-augmented data. To handle the uncertainties, 2 different NLP configurations were provided, including and excluding uncertainty assertions for each of the breast, ovarian, and colorectal cancer cohorts. The configuration that included cases with uncertainty assertions was used to estimate the impact of NLP augmentations on algorithm-identified genetic testing candidates.</p>
        </sec>
        <sec>
          <title>NLP-Augmented Algorithm Evaluation</title>
          <p>The evaluation of the NLP-augmented algorithm consisted of two parts: (1) comparing the performance of the NLP-augmented algorithm with that versus the structured data algorithm using manually reviewed data as a reference standard, and (2) estimating NLP’s impact on the patient cohort size generated by the structured data algorithm over the whole data set using the inclusion configuration. A patient-level data set was created in this study. Owing to the large size of the cohort, it was not feasible to validate the expected output for all patient cases. Therefore, we sampled and annotated the algorithm outputs (against the NCCN algorithm evaluation data set) instead of annotating the input data. A review of a subset of 200 cases showed that when the baseline and NLP-augmented algorithms agreed regarding patient eligibility for genetic testing, the algorithm outputs were correct in 100% of the cases. Therefore, for cost-efficient considerations, we applied stratified sampling to down-sample the cases in which the 2 algorithms agreed to maintain a 1:2 ratio between cases with agreement and disagreement. We sampled 100 cases in total, 50 breast and ovarian cancer screening candidates and 50 colorectal cancer screening candidates. Subsequently, 2 annotators independently reviewed these cases to determine whether the 2 algorithms reached the correct conclusion. Any disagreement between the 2 annotators was adjudicated by a third annotator.</p>
          <p>The structured data and NLP-augmented algorithms were compared in terms of precision, recall, and F1 scores. The 95% CIs were computed using the bootstrap method. As we did not obtain the ground truth of the patients’ FHH by contacting the patients themselves, the reference standards were made solely based on the entries in the FHH section. Next, we estimated the effectiveness of NLP augmentation by comparing (1) the number of FHH entries that were computable for the NCCN criteria and (2) the number of patients who met the criteria with and without NLP.</p>
        </sec>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>This study was approved by the institutional review board at the University of Utah (IRB_00154076).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Set Description</title>
        <p>After splitting the data set, 2398 patients with 12,430 FHH entries were included in the NLP development or evaluation data set and 66,853 patients with 494,880 FHH entries were included in the NCCN algorithm evaluation data set. A total of 8172 patients did not have any FHH entries and were excluded from the data set. These 2 data sets were similar in sex, race, ethnicity, and age (<xref ref-type="table" rid="table3">Table 3</xref>).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Patient characteristics in the NLP<sup>a</sup> development or evaluation data set and the NCCN<sup>b</sup> algorithm evaluation data set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="260"/>
            <col width="360"/>
            <col width="350"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Characteristic</td>
                <td>NLP development or evaluation data set (n=2398)</td>
                <td>NCCN algorithm evaluation data set (n=66,853)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Gender (male), n (%)</td>
                <td>998 (41.2)</td>
                <td>24,524 (36.7)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Race, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>White</td>
                <td>1752 (73.2)</td>
                <td>51,171 (76.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other</td>
                <td>359 (15)</td>
                <td>9510 (14.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Asian</td>
                <td>141 (5.9)</td>
                <td>2973 (4.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Black or African American</td>
                <td>67 (2.8)</td>
                <td>1450 (2.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not reported</td>
                <td>56 (2.3)</td>
                <td>1226 (1.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>American Indian or Alaska Native</td>
                <td>17 (0.7)</td>
                <td>523 (0.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic ethnicity</td>
                <td>327 (13.6)</td>
                <td>9147 (13.7)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Age (years), mean (SD)</td>
                <td>40.2 (9.6)</td>
                <td>42.6 (9.9)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>NLP: natural language processing.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>NCCN: National Comprehensive Cancer Network.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>NLP Performance Evaluation Results</title>
        <p>Using the snippet-level test data set, we evaluated the NLP’s performance at the snippet level; the average precision was 0.94 with 95% CI 0.91-0.97, the average recall was 0.94 with 95% CI 0.90-0.96, the average F1 score was 0.94 with 95% CI 0.91-0.96. The performance of the measurements for each relationship type is presented in <xref ref-type="table" rid="table4">Table 4</xref>.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>The performance on the snippet-level data set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="190"/>
            <col width="90"/>
            <col width="90"/>
            <col width="90"/>
            <col width="180"/>
            <col width="180"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Relation types</td>
                <td>TP<sup>a</sup></td>
                <td>FP<sup>b</sup></td>
                <td>FN<sup>c</sup></td>
                <td>Precision</td>
                <td>Recall</td>
                <td>F1 score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>FX_CANCER<sup>d</sup></td>
                <td>489</td>
                <td>32</td>
                <td>31</td>
                <td>0.94</td>
                <td>0.94</td>
                <td>0.94</td>
              </tr>
              <tr valign="top">
                <td>FX_SYNDROME<sup>e</sup></td>
                <td>2</td>
                <td>1</td>
                <td>3</td>
                <td>0.67</td>
                <td>0.40</td>
                <td>0.50</td>
              </tr>
              <tr valign="top">
                <td>FX_GENE_MUT<sup>f</sup></td>
                <td>2</td>
                <td>0</td>
                <td>0</td>
                <td>1.00</td>
                <td>1.00</td>
                <td>1.00</td>
              </tr>
              <tr valign="top">
                <td>FX_ONSET<sup>g</sup></td>
                <td>203</td>
                <td>10</td>
                <td>14</td>
                <td>0.95</td>
                <td>0.94</td>
                <td>0.94</td>
              </tr>
              <tr valign="top">
                <td>Microaverage<sup>h</sup></td>
                <td>N/A<sup>i</sup></td>
                <td>N/A</td>
                <td>N/A</td>
                <td>0.94 (0.91-0.97)</td>
                <td>0.94 (0.90-0.96)</td>
                <td>0.94 (0.91-0.96)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>TP: true positive.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>FP: false positive.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>FN: false negative.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>FX_CANCER: family member to cancer relation.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>FX_SYNDROME: family member to cancer syndrome relation.</p>
            </fn>
            <fn id="table4fn6">
              <p><sup>f</sup>FX_GENE_MUT: family member to cancer-related gene-mutation relation.</p>
            </fn>
            <fn id="table4fn7">
              <p><sup>g</sup>FX_ONSET: Family member to age of onset relationship.</p>
            </fn>
            <fn id="table4fn8">
              <p><sup>h</sup>These scores were computed using aggregated data, including all 4 relation types. The CIs were computed using the bootstrap method.</p>
            </fn>
            <fn id="table4fn9">
              <p><sup>i</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>NLP Error Analysis</title>
        <p>On the basis of the snippet-level error analysis of the NLP output from the test data set of 1000 FHH entries, we found 6 error types (<xref ref-type="table" rid="table5">Table 5</xref>). Approximately 50% of the errors were not directly caused by NLP mistakes. The <italic>Annotation Error</italic> was made by the annotators, which is common when a large volume of data needs to be reviewed. In addition, as we only partially overlapped the annotations and adjudicated the disagreement between the annotators for greater efficiency, the data that were not overlapped might also have contributed to annotation errors. <italic>Data Input Typos</italic> were another complication, especially some rare typos; for example, <italic>bladdler</italic>. <italic>Out of Vocabulary</italic> signified the words and phrases that were not seen in the training set and not added to the knowledge base from Unified Medical Language System and experts’ suggestions. For instance, <italic>precancer</italic> in the entry of <italic>{{CANCER, BREAST}} precancer, age 30 {{MOTHER}} {{}}</italic> should override the breast cancer code, because <italic>precancer</italic> is a term that describes a lesion that may develop into cancer. The NLP did not recognize the term; therefore, it was not possible to exclude breast cancer as an existing family health history. A <italic>Context Error</italic> might happen when the context of the entities included subtleties that the NLP could not correctly parse, for example, <italic>{{CANCER, COLON}} possible, colon cancer, died when pt was 5 years old {{FATHER}} {{}}</italic>. The NLP did not expect that the <italic>5-year old</italic> was not describing the father’s age of onset in the comments field, but the patient’s age. Sometimes, the input data is so ambiguous (ambiguous input) that even our annotators were not sure of the exact meaning without referring to other sources. For example, the entry <italic>{{CANCER, COLON}} ileum {{FATHER}} {{}}</italic>, likely meant the father had <italic>ileum cancer</italic>, which overwrote <italic>colon cancer</italic>. However, we were not 100% confident if the father actually had both because most of the cases like these would have been coded as <italic>{{CANCER, OTHER}} ileum {{FATHER}} {{}}</italic>. In real practice, genetic counselors would need to go over some clinical notes to find statements that can be cross-referenced or reach out to the patient to confirm the information. These types of improper coding in the structured fields and the conflicting information between the structured fields and comments field indicate that the EHR user interface for FHH entry may benefit from redesign, such as allowing users to label uncertainty. Finally, when designing the schema for annotation, we aimed to capture as much useful information as possible. We included three aggregated types of cancer, <italic>GYNECOLOGIC</italic>, <italic>GASTROINTESTINAL</italic>, <italic>GENITOURINARY</italic>, to code cancers not specific to the anatomical sites indicated in the guidelines. However, when executing the algorithms, these types are less useful, as they would result in more false-positive cases that are likely not relevant to the requirements. Therefore, these 3 types were excluded from the final NLP solution. Compared with the snippet level, this <italic>schema mismatch</italic> caused errors. For instance, <italic>colon rectal cancer</italic> was annotated as <italic>GASTROINTESTINAL</italic> to capture both, but in the NLP implementation, only one RECTAL cancer was counted instead of two cancers to simplify the implementation. This mismatch did not affect the patient-level results but was counted as a snippet-level error.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Type of snippet-level errors and counts.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="190"/>
            <col width="190"/>
            <col width="370"/>
            <thead>
              <tr valign="top">
                <td>Type of errors</td>
                <td>False positive, n</td>
                <td>False negative, n</td>
                <td>Examples</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Annotation error</td>
                <td>10</td>
                <td>13</td>
                <td>A missed annotation</td>
              </tr>
              <tr valign="top">
                <td>Data input typo<sup>a</sup></td>
                <td>1</td>
                <td>5</td>
                <td>bladdler ca<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Out of vocabulary<sup>a</sup></td>
                <td>2</td>
                <td>6</td>
                <td>Precancer</td>
              </tr>
              <tr valign="top">
                <td>Context error<sup>a</sup></td>
                <td>22</td>
                <td>11</td>
                <td>Possible, colon cancer, died when pt was <italic>5 years old</italic> {{FATHER}}</td>
              </tr>
              <tr valign="top">
                <td>Ambiguous input</td>
                <td>2</td>
                <td>3</td>
                <td>{{CANCER, COLON}} ileum {{FATHER}}</td>
              </tr>
              <tr valign="top">
                <td>Schema mismatch<sup>c</sup></td>
                <td>6</td>
                <td>10</td>
                <td>See above</td>
              </tr>
              <tr valign="top">
                <td>Total</td>
                <td>43</td>
                <td>48</td>
                <td>N/A<sup>d</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>These 3 types of errors are natural language processing (NLP)–caused errors or can be fixed by improving the NLP.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>ca: cancer.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>This type of error does not need to be fixed.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>NLP-Augmented Algorithm Evaluation Results</title>
        <p>The first part of this evaluation compared the NLP-augmented algorithm (using the inclusion configuration) with the structured data algorithm over a stratified sample of 100 patients (50 breast cancer and 50 colorectal cancer, with a 1:2 ratio of cases with agreement versus disagreement between unstructured and structured data). The NLP-augmented algorithm performed better than the structured data algorithm both in precision (0.99, 95% CI 0.96-1.00 vs 0.81, 95% CI 0.65-0.95), recall (0.95, 95% CI 0.90-0.99 vs 0.29, 95% CI 0.19-0.40), and F1 scores (0.97, 95% CI 0.94-0.99 vs 0.43, 95% CI 0.31-0.54).</p>
        <p>In the second part of this evaluation, using the whole data set, compared with the original structured FHH entries, NLP augmentation yielded 21,703 (33.6%) additional computable FHH entries, with 8692 (27.9%) entries added owing to the extraction of conditions, 2689 (69.3%) owing to age of onset, and 10,322 (34.9%) owing to family members. With these additional entries extracted by NLP, 1578 (51%) patients met the NCCN criteria for breast cancer genetic testing, 373 (94%) patients met the criteria for colorectal cancer genetic testing, and 1841 (53.8%) additional unique patients met either or both criteria.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study developed and evaluated an NLP-augmented algorithm to identify patients who met evidence-based criteria for genetic testing of hereditary colorectal and breast cancer. Overall, the proposed automated algorithm offers a promising approach to identifying these patients as an alternative to current clinical workflows, which rely on extensive manual review of patient records. We also demonstrated that compared with structured data alone, an NLP algorithm that focused on the interplay between structured data and associated free-text comments significantly increased the computability of FHH entries and algorithm accuracy. Compared with structured data alone, NLP augmentation led to a 53.8% increase in the number of patients available to compute against the NCCN criteria for genetic testing.</p>
        <p>Chen et al recognized the significance of data recorded in the FHH section of an EHR [<xref ref-type="bibr" rid="ref12">12</xref>]. They characterized the use and contents of the FHH comments field and found that it was used to augment or modify the attributes of the statement (eg, uncertainty and negation) for all 3 types of entities: <italic>family member</italic>, <italic>condition</italic>, and <italic>age of onset</italic>. However, they did not develop a complete solution for extracting these relationships. In a previous study, we used NLP to extract the disease age of onset from the comments field [<xref ref-type="bibr" rid="ref10">10</xref>]. In this study, we extended the NLP solution to extract all 3 types of entities and the relations between them. In addition, the algorithm reconciles information from structured and unstructured data to identify patients who meet the NCCN criteria for genetic testing of 2 common hereditary cancers. The study results demonstrated that the NLP-augmented algorithm accurately extracted relevant FHH at the snippet level that combined the structured and comments fields. At the patient level, the algorithm significantly improved the recall and precision of identifying patients who met the NCCN criteria for genetic testing of hereditary breast colorectal cancer.</p>
        <p>Compared with previously published studies on FHH extraction using NLP, this study differs significantly in the input data source, types of technical challenges, and ultimate goals. Previous studies have focused primarily on extracting FHH from clinical notes, whereas our approach targets the FHH section of the EHR by combining structured and unstructured data. Complete sentences are typical in the FHH narrative of clinical notes, while single words, phrases, and short sentences are more typical in the FHH comment fields. Consequently, the technical challenges are different. Challenges in extracting FHH from clinical notes include FHH section detection, entity recognition, and relation detection [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. In contrast, targeted extraction from the FHH section of the EHR requires reconciliation between structured and unstructured data, as they can be complementary, redundant, or conflicting [<xref ref-type="bibr" rid="ref12">12</xref>]. In addition, extraction from clinical notes focuses on general FHH extraction, whereas our approach aims to identify patients with a specific clinical purpose. Thus, the NLP performance reported in <xref ref-type="table" rid="table4">Table 4</xref> is not directly comparable with that reported in previous studies.</p>
        <p>As noted above, the NLP-augmented algorithm can be configured to include or exclude FHH entries with uncertain statements in the free-text comments. The choice of configuration depends on the requirements of specific use cases and available institutional resources. For instance, in a study that aimed to reach out to eligible patients offering genetic testing, a higher priority may have been given to patients who met testing criteria with a higher degree of certainty (ie, excluding uncertain statements) to minimize manual screening efforts. In contrast, if genetic testing outreach is rolled out as usual care, an institution may want to maximize the benefits of genetic testing to as many patients as possible by including uncertain statements. The difference in algorithm performance between the 2 configurations (ie, including vs excluding uncertainty statements) was not significant. Thus, we did not report the results using the exclusion configuration.</p>
        <p>The results showed that the NLP-augmented algorithm had significantly higher precision and recall than structured data alone in identifying patients who met the NCCN criteria for genetic testing. This increase was achieved because the comments field provided additional information that can be used to compute the NCCN criteria, including the cancer type (eg, <italic>pancreatic cancer)</italic>, the age of onset (eg, <italic>diagnosed colon cancer, at age 40</italic>), and the affected family member (eg, <italic>paternal aunt</italic>). In addition, information in the comments field can correct inaccurate data in structured fields.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study had several limitations. First, we used data from one EHR at an academic medical center. Therefore, we cannot conclude that the algorithm and study findings are generalizable to other EHRs and health care systems. However, the EHR used in this study is one of the most widely used EHRs in the United States, and other EHR products use similar FHH sections to collect FHH data [<xref ref-type="bibr" rid="ref12">12</xref>], suggesting that the proposed approach may be adapted to those settings. Second, error analysis demonstrated that certain FHH entries could not be disambiguated based on the available data provided in the FHH section. Future studies could investigate approaches to disambiguate these FHH entries, such as applying NLP to clinical notes or asking patients to confirm through the patient portal.</p>
        <p>As the patient-level data set down-sampled the cases in which the 2 algorithms agreed, the difference between the NLP-augmented algorithm and the structured data algorithm was amplified correspondingly. Thus, we did not analyze the statistical differences between the algorithms on this data set. Despite this, the results showed that when these 2 algorithms disagreed with each other, the NLP-augmented algorithm likely received correct answers. In addition, because of the down-sampling, more challenging cases were likely included in the reference data set compared with the original data set. Thus, the actual performance of both algorithms is potentially higher than the scores reported in the section of <italic>NLP-Augmented Algorithm Evaluation Results</italic>.</p>
        <p>Although the NLP-augmented algorithm still missed eligible patients, it achieved higher recall than the structured algorithm. Future studies could investigate combining FHH extraction from both FHH sections and clinical notes to further reduce false-negative errors. In addition, other solutions beyond NLP are needed to improve the accuracy and comprehensiveness of the FHH collection in the EHR.</p>
        <p>Finally, we investigated only a rule-based solution for the NLP task. Given that the performance was satisfactory and the rule-based approach could be customized quickly for error fixing and future enhancements, we decided that it was not worthwhile to investigate more complex machine learning–based solutions.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study demonstrated that our NLP solution can accurately extract FHH from both the structured and unstructured fields of the FHH section. Applying this NLP solution to augment the structured data algorithm could improve the precision and recall of identifying patients who meet the NCCN criteria for genetic testing of hereditary breast and colorectal cancer.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CDS</term>
          <def>
            <p>clinical decision support</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EasyCIE</term>
          <def>
            <p>easy clinical information extractor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FHH</term>
          <def>
            <p>family health history</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NCCN</term>
          <def>
            <p>National Comprehensive Cancer Network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SQL</term>
          <def>
            <p>Structured Query Language</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by grants U24CA204800 and U01CA232826 from the National Cancer Institute of the United States. National Institutes of Health and T15LM007124 of the National Library of Medicine.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Onega</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Beaber</surname>
              <given-names>EF</given-names>
            </name>
            <name name-style="western">
              <surname>Sprague</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Barlow</surname>
              <given-names>WE</given-names>
            </name>
            <name name-style="western">
              <surname>Haas</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Tosteson</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>D Schnall</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Schapira</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Geller</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Weaver</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Conant</surname>
              <given-names>EF</given-names>
            </name>
          </person-group>
          <article-title>Breast cancer screening in an era of personalized regimens: a conceptual model and National Cancer Institute initiative for risk-based and preference-based approaches at a population level</article-title>
          <source>Cancer</source>
          <year>2014</year>
          <month>10</month>
          <day>01</day>
          <volume>120</volume>
          <issue>19</issue>
          <fpage>2955</fpage>
          <lpage>64</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/cncr.28771"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/cncr.28771</pub-id>
          <pub-id pub-id-type="medline">24830599</pub-id>
          <pub-id pub-id-type="pmcid">PMC4342235</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kahi</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Imperiale</surname>
              <given-names>TF</given-names>
            </name>
            <name name-style="western">
              <surname>Juliar</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Rex</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Effect of screening colonoscopy on colorectal cancer incidence and mortality</article-title>
          <source>Clin Gastroenterol Hepatol</source>
          <year>2009</year>
          <month>07</month>
          <volume>7</volume>
          <issue>7</issue>
          <fpage>770</fpage>
          <lpage>5; quiz 711</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cgh.2008.12.030</pub-id>
          <pub-id pub-id-type="medline">19268269</pub-id>
          <pub-id pub-id-type="pii">S1542-3565(09)00006-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>GD</given-names>
            </name>
          </person-group>
          <article-title>Management of women at high risk of breast cancer</article-title>
          <source>BMJ</source>
          <year>2014</year>
          <month>04</month>
          <day>28</day>
          <volume>348</volume>
          <issue>apr28 26</issue>
          <fpage>g2756</fpage>
          <pub-id pub-id-type="doi">10.1136/bmj.g2756</pub-id>
          <pub-id pub-id-type="medline">24778341</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Walter</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Schonberg</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Screening mammography in older women: a review</article-title>
          <source>JAMA</source>
          <year>2014</year>
          <month>04</month>
          <day>02</day>
          <volume>311</volume>
          <issue>13</issue>
          <fpage>1336</fpage>
          <lpage>47</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24691609"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2014.2834</pub-id>
          <pub-id pub-id-type="medline">24691609</pub-id>
          <pub-id pub-id-type="pii">1853134</pub-id>
          <pub-id pub-id-type="pmcid">PMC4391705</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>HD</given-names>
            </name>
            <name name-style="western">
              <surname>Pappas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zakher</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Okinaka-Hu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Risk assessment, genetic counseling, and genetic testing for BRCA-related cancer in women: a systematic review to update the U.S. Preventive services task force recommendation</article-title>
          <source>Ann Intern Med</source>
          <year>2014</year>
          <month>02</month>
          <day>18</day>
          <volume>160</volume>
          <issue>4</issue>
          <fpage>255</fpage>
          <lpage>66</lpage>
          <pub-id pub-id-type="doi">10.7326/m13-1684</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Daly</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Pilarski</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yurgelun</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Berry</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Buys</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Dickson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Domchek</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Elkhanany</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Garber</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Goggins</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hutton</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kohlmann</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kurian</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Laronga</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Litton</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Mak</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Menendez</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Merajver</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Norquist</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Offit</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pal</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Pederson</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Reiser</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shannon</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Visvanathan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Weitzel</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Wick</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wisinski</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Dwyer</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Darlow</surname>
              <given-names>SD</given-names>
            </name>
          </person-group>
          <article-title>NCCN guidelines insights: genetic/familial high-risk assessment: breast, ovarian, and pancreatic, version 1.2020</article-title>
          <source>J Natl Compr Canc Netw</source>
          <year>2020</year>
          <month>04</month>
          <volume>18</volume>
          <issue>4</issue>
          <fpage>380</fpage>
          <lpage>91</lpage>
          <comment>[Referenced with permission from the National Comprehensive Cancer Network, Inc. 2020]</comment>
          <pub-id pub-id-type="doi">10.6004/jnccn.2020.0017</pub-id>
          <pub-id pub-id-type="medline">32259785</pub-id>
          <pub-id pub-id-type="pii">jnccnGLINS1804</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Provenzale</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Llor</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Halverson</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Grady</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Haraldsdottir</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Markowitz</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Slavin</surname>
              <given-names>TP</given-names>
            </name>
            <name name-style="western">
              <surname>Hampel</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ness</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Ahnen</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Early</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Giardiello</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Kanth</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Klapman</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Lazenby</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lynch</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Mayer</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mikkelson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Peter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Regenbogen</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Dwyer</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Ogba</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>NCCN guidelines insights: genetic/familial high-risk assessment: colorectal, version 2.2019</article-title>
          <source>J Natl Compr Canc Netw</source>
          <year>2019</year>
          <month>09</month>
          <day>01</day>
          <volume>17</volume>
          <issue>9</issue>
          <fpage>1032</fpage>
          <lpage>41</lpage>
          <comment>[Referenced with permission from the National Comprehensive Cancer Network, Inc. 2020]</comment>
          <pub-id pub-id-type="doi">10.6004/jnccn.2019.0044</pub-id>
          <pub-id pub-id-type="medline">31487681</pub-id>
          <pub-id pub-id-type="pii">jnccnGLsINScolongenetics1709</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Del Fiol</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kohlmann</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bradshaw</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Weir</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Flynn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hess</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Schiffman</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Nanjo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kawamoto</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Standards-based clinical decision support platform to manage patients who meet guideline-based criteria for genetic evaluation of familial cancer</article-title>
          <source>JCO Clin Cancer Informatics</source>
          <year>2020</year>
          <month>11</month>
          <issue>4</issue>
          <fpage>1</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1200/cci.19.00120</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bill</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pakhomov</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Winden</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Carter</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>GB</given-names>
            </name>
          </person-group>
          <article-title>Automated extraction of family history information from clinical notes</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2014</year>
          <volume>2014</volume>
          <fpage>1709</fpage>
          <lpage>17</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25954443"/>
          </comment>
          <pub-id pub-id-type="medline">25954443</pub-id>
          <pub-id pub-id-type="pmcid">PMC4419952</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mowery</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Kawamoto</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bradshaw</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kohlmann</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Schiffman</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Weir</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Borbolla</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Del Fiol</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Determining onset for familial breast and colorectal cancer from family history comments in the electronic health record</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2019</year>
          <volume>2019</volume>
          <fpage>173</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31258969"/>
          </comment>
          <pub-id pub-id-type="medline">31258969</pub-id>
          <pub-id pub-id-type="pmcid">PMC6568127</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mehrabi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ihrke</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Exploring gaps of family history documentation in EHR for precision medicine -a case study of familial hypercholesterolemia ascertainment</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2016</year>
          <volume>2016</volume>
          <fpage>160</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27570664"/>
          </comment>
          <pub-id pub-id-type="medline">27570664</pub-id>
          <pub-id pub-id-type="pmcid">PMC5001769</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Burdick</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenau</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sarkar</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Characterizing the use and contents of free-text family history comments in the Electronic Health Record</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2012</year>
          <volume>2012</volume>
          <fpage>85</fpage>
          <lpage>92</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23304276"/>
          </comment>
          <pub-id pub-id-type="medline">23304276</pub-id>
          <pub-id pub-id-type="pmcid">PMC3540518</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Taber</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ghani</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Schiffman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kohlmann</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hess</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chidambaram</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kawamoto</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Waller</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Borbolla</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Del Fiol</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Weir</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Physicians' strategies for using family history data: having the data is not the same as using the data</article-title>
          <source>JAMIA Open</source>
          <year>2020</year>
          <month>10</month>
          <volume>3</volume>
          <issue>3</issue>
          <fpage>378</fpage>
          <lpage>85</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/34632321"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamiaopen/ooaa035</pub-id>
          <pub-id pub-id-type="medline">34632321</pub-id>
          <pub-id pub-id-type="pii">ooaa035</pub-id>
          <pub-id pub-id-type="pmcid">PMC7660959</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Henry</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Family history extraction from synthetic clinical narratives using natural language processing: overview and evaluation of a challenge data set and solutions for the 2019 national NLP clinical challenges (n2c2)/open health natural language processing (OHNLP) competition</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>01</month>
          <day>27</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>e24008</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/1/e24008/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/24008</pub-id>
          <pub-id pub-id-type="medline">33502329</pub-id>
          <pub-id pub-id-type="pii">v9i1e24008</pub-id>
          <pub-id pub-id-type="pmcid">PMC7875692</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Extracting family history of patients from clinical narratives: exploring an end-to-end solution with deep learning models</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>12</month>
          <day>15</day>
          <volume>8</volume>
          <issue>12</issue>
          <fpage>e22982</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/12/e22982/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/22982</pub-id>
          <pub-id pub-id-type="medline">33320104</pub-id>
          <pub-id pub-id-type="pii">v8i12e22982</pub-id>
          <pub-id pub-id-type="pmcid">PMC7772072</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stenetorp</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pyysalo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Topić</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ohta</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ananiadou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tsujii</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>brat: a web-based tool for NLP-assisted text annotation</article-title>
          <source>Proceedings of the Demonstrations at the 13th Conference of the European Chapter of the Association for Computational Linguistics</source>
          <year>2012</year>
          <conf-name>Demonstrations at the 13th Conference of the European Chapter of the Association for Computational Linguistics</conf-name>
          <conf-date>Apr 23 - 27, 2012</conf-date>
          <conf-loc>Avignon, France</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/E12-2021"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <article-title>Cancer family history annotation schema</article-title>
          <source>GitHub</source>
          <access-date>2022-03-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/jianlins/fmx_schema">https://github.com/jianlins/fmx_schema</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mowery</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sanders</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gawron</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Extracting intrauterine device usage from clinical texts using natural language processing</article-title>
          <source>Proceedings of the 2017 IEEE International Conference on Healthcare Informatics (ICHI)</source>
          <year>2017</year>
          <conf-name>2017 IEEE International Conference on Healthcare Informatics (ICHI)</conf-name>
          <conf-date>Aug 23-26, 2017</conf-date>
          <conf-loc>Park City, UT, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ichi.2017.21</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goryachev</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng-Treitler</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Identification and extraction of family history information from clinical reports</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2008</year>
          <month>11</month>
          <day>06</day>
          <volume>2008</volume>
          <fpage>247</fpage>
          <lpage>51</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18999129"/>
          </comment>
          <pub-id pub-id-type="medline">18999129</pub-id>
          <pub-id pub-id-type="pmcid">PMC2656021</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hurdle</surname>
              <given-names>JF</given-names>
            </name>
          </person-group>
          <article-title>Trie-based rule processing for clinical NLP: a use-case study of n-trie, making the ConText algorithm more efficient and scalable</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>09</month>
          <volume>85</volume>
          <fpage>106</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30157-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.08.002</pub-id>
          <pub-id pub-id-type="medline">30092358</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30157-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6171746</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <article-title>EasyCIE_Hub</article-title>
          <source>GitHub</source>
          <access-date>2022-06-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/jianlins/EasyCIE_Hub">https://github.com/jianlins/EasyCIE_Hub</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Austin</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>JV</given-names>
            </name>
          </person-group>
          <article-title>Bootstrap methods for developing predictive models</article-title>
          <source>Am Statistician</source>
          <year>2004</year>
          <month>05</month>
          <volume>58</volume>
          <issue>2</issue>
          <fpage>131</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1198/0003130043277</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kawamoto</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>OpenCDS: an open-source, standards-based, service-oriented framework for scalable CDS</article-title>
          <source>Proceedings of the SOA in Healthcare 2011 Conference</source>
          <year>2011</year>
          <conf-name>SOA in Healthcare 2011 Conference</conf-name>
          <conf-date>Jul 13-15, 2011</conf-date>
          <conf-loc>Hyatt Dulles, Herndon</conf-loc>
          <pub-id pub-id-type="doi">10.1016/j.csi.2020.103468</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaphingst</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Kohlmann</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chambers</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Goodman</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Bradshaw</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Chavez-Yenter</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Colonna</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Espinel</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Everett</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Gammon</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Goldberg</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hagerty</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hess</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kehoe</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kessler</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kimball</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Loomis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Martinez</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Monahan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Schiffman</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Temares</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tobik</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wetter</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Kawamoto</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Del Fiol</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Buys</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Ginsburg</surname>
              <given-names>O</given-names>
            </name>
            <collab>BRIDGE research team</collab>
          </person-group>
          <article-title>Comparing models of delivery for cancer genetics services among patients receiving primary care who meet criteria for genetic evaluation in two healthcare systems: BRIDGE randomized controlled trial</article-title>
          <source>BMC Health Serv Res</source>
          <year>2021</year>
          <month>06</month>
          <day>02</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>542</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmchealthservres.biomedcentral.com/articles/10.1186/s12913-021-06489-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12913-021-06489-y</pub-id>
          <pub-id pub-id-type="medline">34078380</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12913-021-06489-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC8170651</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
