<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i4e35257</article-id>
      <article-id pub-id-type="pmid">35436226</article-id>
      <article-id pub-id-type="doi">10.2196/35257</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Natural Language Processing for Assessing Quality Indicators in Free-Text Colonoscopy and Pathology Reports: Development and Usability Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Elbattah</surname>
            <given-names>Mahmoud</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Bae</surname>
            <given-names>Jung Ho</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7669-1213</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Han</surname>
            <given-names>Hyun Wook</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biomedical Informatics</institution>
            <institution>CHA University School of Medicine</institution>
            <institution>CHA University</institution>
            <addr-line>335, Pangyo-ro, Bundang-gu</addr-line>
            <addr-line>Seongnam, 13488</addr-line>
            <country>Republic of Korea</country>
            <fax>82 31 881 7069</fax>
            <phone>82 31 881 7109</phone>
            <email>stepano7@gmail.com</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6918-5694</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>Sun Young</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4766-3752</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Song</surname>
            <given-names>Gyuseon</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2816-9177</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Sa</surname>
            <given-names>Soonok</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5736-8813</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Chung</surname>
            <given-names>Goh Eun</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8344-7737</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Seo</surname>
            <given-names>Ji Yeon</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3692-8807</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Jin</surname>
            <given-names>Eun Hyo</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2126-3315</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>Heecheon</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2683-2484</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>An</surname>
            <given-names>DongUk</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2491-2288</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>CHA University School of Medicine</institution>
        <institution>CHA University</institution>
        <addr-line>Seongnam</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Institute for Biomedical Informatics</institution>
        <institution>CHA University School of Medicine</institution>
        <institution>CHA University</institution>
        <addr-line>Seongnam</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Internal Medicine and Healthcare Research Institute</institution>
        <institution>Healthcare System Gangnam Center</institution>
        <institution>Seoul National University Hospital</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Miso Info Tech Co, Ltd</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Hyun Wook Han <email>stepano7@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>4</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>4</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>4</issue>
      <elocation-id>e35257</elocation-id>
      <history>
        <date date-type="received">
          <day>29</day>
          <month>11</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>20</day>
          <month>12</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>13</day>
          <month>2</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>25</day>
          <month>2</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Jung Ho Bae, Hyun Wook Han, Sun Young Yang, Gyuseon Song, Soonok Sa, Goh Eun Chung, Ji Yeon Seo, Eun Hyo Jin, Heecheon Kim, DongUk An. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 15.04.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/4/e35257" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Manual data extraction of colonoscopy quality indicators is time and labor intensive. Natural language processing (NLP), a computer-based linguistics technique, can automate the extraction of important clinical information, such as adverse events, from unstructured free-text reports. NLP information extraction can facilitate the optimization of clinical work by helping to improve quality control and patient management.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We developed an NLP pipeline to analyze free-text colonoscopy and pathology reports and evaluated its ability to automatically assess adenoma detection rate (ADR), sessile serrated lesion detection rate (SDR), and postcolonoscopy surveillance intervals.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The NLP tool for extracting colonoscopy quality indicators was developed using a data set of 2000 screening colonoscopy reports from a single health care system, with an associated 1425 pathology reports. The NLP system was then tested on a data set of 1000 colonoscopy reports and its performance was compared with that of 5 human annotators. Additionally, data from 54,562 colonoscopies performed between 2010 and 2019 were analyzed using the NLP pipeline.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The NLP pipeline achieved an overall accuracy of 0.99-1.00 for identifying polyp subtypes, 0.99-1.00 for identifying the anatomical location of polyps, and 0.98 for counting the number of neoplastic polyps. The NLP pipeline achieved performance similar to clinical experts for assessing ADR, SDR, and surveillance intervals. NLP analysis of a 10-year colonoscopy data set identified great individual variance in colonoscopy quality indicators among 25 endoscopists.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The NLP pipeline could accurately extract information from colonoscopy and pathology reports and demonstrated clinical efficacy for assessing ADR, SDR, and surveillance intervals in these reports. Implementation of the system enabled automated analysis and feedback on quality indicators, which could motivate endoscopists to improve the quality of their performance and improve clinical decision-making in colorectal cancer screening programs.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>colonoscopy</kwd>
        <kwd>adenoma</kwd>
        <kwd>endoscopy</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>High-quality colonoscopy is a proven method of reducing colorectal cancer risk by allowing early detection and removal of premalignant polyps [<xref ref-type="bibr" rid="ref1">1</xref>]. However, there are considerable variations in the quality of colonoscopies performed by endoscopists [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. Therefore, quality assurance is an essential part of colonoscopy screening programs, and the American Society of Gastrointestinal Endoscopy/American College of Gastroenterology Task Force on Quality in Endoscopy has published indicators for colonoscopy to improve safety and quality [<xref ref-type="bibr" rid="ref5">5</xref>]. While all the indicators are important, the adenoma detection rate (ADR) and sessile serrated lesion (SSL) detection rate (SDR) of endoscopists are well-established key indicators of postcolonoscopy colorectal cancer incidence and related deaths [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. Another crucial quality indicator is the adherence to guidelines for setting the frequency of follow-up colonoscopies, known as the surveillance interval. Recommending an incorrect surveillance interval may increase the incidence of metachronous lesion or lead to the overuse of colonoscopies [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
      <p>Periodically reporting to endoscopists their performance on quality measures effectively improves the quality of colonoscopies by encouraging introspection and motivation for behavior changes [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. However, reporting ADR, SDR, and surveillance intervals requires careful manual review of colonoscopy reports and their associated pathology reports and following this review with a calculation of polyp data based on clinical guidelines. This series of processes for quality reporting is laborious and time-consuming.</p>
      <p>Natural language processing (NLP) is a computer-based linguistics technique used to extract information from free-text data documents [<xref ref-type="bibr" rid="ref12">12</xref>]. NLP allows the automation of report creation by extracting important clinical information from unstructured free-text documents. NLP has been used in various clinical fields [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. The application of NLP to information extraction requires identifying clinical information, such as adverse events, and facilitates various aspects of optimizing clinical work, such as quality control and patient management [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
      <p>Here, we developed an NLP pipeline for the automated assessment of quality indicators, such as ADR, SDR, and surveillance intervals, from multi-language colonoscopy and pathology report forms. The pipeline was evaluated in a validation set and compared with expert manual reviews to determine whether the pipeline could reliably assist the inefficient manual process. The NLP system was also applied to a 10-year set of colonoscopy and pathology reports to investigate its ability to process real-world data on colonoscopy quality indicators from individual endoscopists.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design and Population</title>
        <p>Colonoscopy for colon cancer screening was performed at Seoul National University Hospital Gangnam Center, where comprehensive medical checkups of approximately 30,000 patients are conducted annually. A total of 121,059 screening and surveillance colonoscopies with 63,697 associated pathology reports from 36,119 patients examined between 2003 and 2019 were derived from SUPREME (Seoul National University Hospital Patients Research Environment), the clinical data warehouse of Seoul National University Hospital. A representative sample of 3000 colonoscopy reports, paired with 2168 pathology reports, from 3000 patients examined after 2003 was randomly selected and used as the development data set for the NLP pipeline (<xref rid="figure1" ref-type="fig">Figure 1</xref>). The reports were divided into a training data set of 2000 colonoscopy reports for NLP rule formulation and a testing data set of 1000 colonoscopy reports for validation. Five human annotators (4 board-certified gastroenterologists and 1 researcher) manually reviewed all procedure data and made reference to a consensus of the 5 human annotators for the data set.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Data set description and process for the NLP pipeline development and information extraction. NLP: natural language processing.</p>
          </caption>
          <graphic xlink:href="medinform_v10i4e35257_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>NLP Pipeline Development</title>
        <p>We used regular expressions in Python (3.7.10, Python Software Foundation) and smartTA (1.0b, MISO Info Tech) to develop the NLP pipeline. Regular expressions are a sequence of characters specialized for complex text processing using metacharacters [<xref ref-type="bibr" rid="ref19">19</xref>]. smartTA is NLP software that helps analyze linguistic patterns and construct lexicons. The NLP pipeline was developed with the following steps: First, we developed multi-language report forms (in Korean only, in English only, and a mixed report form) for the NLP pipeline processing by creating a Korean-English lexicon for medical terms, synonyms, and endoscopic abbreviations using a training data set and a colonoscopy textbook [<xref ref-type="bibr" rid="ref20">20</xref>]. Second, we determined removable terms and phrases in the reports through an interactive discussion with gastroenterologists. Third, we defined the extraction rules using smartTA. Fourth, we updated the rules after the extracted results were evaluated by gastroenterologists. These development steps were repeated until it was no longer possible to obtain performance increases by updating the extraction rules. The final version was validated using the 1000-report testing data set.</p>
        <p>The NLP pipeline developed for this study consisted of text preprocessing, information extraction, and summarization (<xref rid="figure1" ref-type="fig">Figure 1</xref>, <xref rid="figure2" ref-type="fig">Figure 2</xref>). In text preprocessing, the colonoscopy and associated pathology reports were combined as follows: each sentence including a biopsy-related phrase (ie, an abbreviation, number, or character) in the findings section of the colonoscopy report was linked with polyp histopathology results in the diagnosis section of the pathology report according to the sequence of specimens in the pathology report. In information extraction, the pipeline consulted the lexicon to extract the target information, including the presence, type, location, and size of polyps, from the combined colonoscopy-pathology text.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Extraction and summarization process of the NLP pipeline. NLP: natural language process; Y/N: yes/no (indicating presence or absence); Rt: right colon; Lt: left colon.</p>
          </caption>
          <graphic xlink:href="medinform_v10i4e35257_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Finally, the extracted information on the biopsied polyps was summarized in the final summary format and used to calculate the detection rate and surveillance interval.</p>
      </sec>
      <sec>
        <title>Target Variables for Polyp Detection and Surveillance Interval Measurement</title>
        <p>The NLP tool extracted specific information on colon polyps, such as pathological type, anatomical location, and size. The type of colon polyp was extracted from the pathology reports and categorized as adenoma, serrated polyp, or carcinoma. Additionally, the NLP tool extracted the subcategory for adenomas (ie, tubular, tubulovillous, villous, or adenoma with high-grade dysplasia) and serrated polyps (ie, hyperplastic polyp, SSL, or traditional serrated adenoma). Information on the anatomical location of polyps was extracted from the findings section of the colonoscopy reports and defined as follows: left-colon polyps were defined as those located between the rectum and the splenic flexure (ie, the rectum, rectosigmoid, sigmoid, descending colon, and splenic flexure); right-colon polyps were defined as those located between the transverse colon and the cecum (ie, the transverse colon, hepatic flexure, ascending colon, cecum, and ileocecal valve). When location measurements were provided as the distance from the anal verge in cm, a distance of ≥60 cm was considered to be in the right colon.</p>
        <p>The detection rate was calculated as the proportion of colonoscopies that detected at least 1 adenoma or SSL; the overall detection rate and the per-physician detection rate were calculated. The detection rate for advanced adenoma was defined as the proportion of screening colonoscopies that detected a polyp with size ≥1 cm or an adenomatous pathology with high-grade dysplasia or villous features. The detection rate for advanced SSL was defined as the proportion of screening colonoscopies that detected a polyp with a size ≥1 cm or a pathology with low- or high-grade dysplasia. Surveillance intervals were chosen based on the 2020 US Multi-Society Task Force guidelines, which recommend that a patient with neoplastic polyps undergo surveillance colonoscopies at 1 of 6 defined intervals [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
      </sec>
      <sec>
        <title>Statistical Analysis and Performance Evaluation</title>
        <p>Continuous variables were calculated as the mean (SD). Discrete data were tabulated as numbers and percentages. The chi-square test was used to compare proportions, and a 2-tailed <italic>t</italic> test was used to compare quantitative variables. Information extraction performance was evaluated by recall, precision, accuracy, and the F1 score. The F1 score is the harmonic mean of precision and recall. Python (3.7.10) and the SciPy package (1.6.2) were used for statistical calculations [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      </sec>
      <sec>
        <title>Analysis of a 10-Year Set of Colonoscopy Reports for ADR, SDR, and Surveillance Interval</title>
        <p>The NLP pipeline analyzed 54,562 screening and surveillance colonoscopy reports and 34,943 associated pathology reports from 12,264 patients aged ≥50 years at Seoul National University Hospital Gangnam Center; all patients were examined between January 2010 and December 2019. The ADR, SDR, and surveillance intervals were investigated, both overall and individually for endoscopists who performed &#62;500 procedures. The relationship between the polyp detection rate and surveillance interval was also determined.</p>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>This study was approved by the Institutional Review Board of Seoul National University Hospital (1909-093-670).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>NLP Information Extraction Performance</title>
        <p><xref ref-type="table" rid="table1">Table 1</xref> shows the demographics of the 2000-report training data set and the 1000-report testing data set for the NLP pipeline. The NLP tool extracted variables to calculate the quality indicators. <xref ref-type="table" rid="table2">Table 2</xref> shows the extracted key information on pathological type, including advanced features, location, and the number of polyps, which was assessed for recall, precision, accuracy, and the F1 score in the testing data set. The performance of the NLP pipeline ranged from 0.97 to 1.00 in all performance metrics for the presence of adenomas and SSLs with advanced features. For the location of colon polyps, the NLP pipeline demonstrated excellent performance for adenomas, ranging from 0.97 to 1.00; however, the NLP pipeline demonstrated a relatively lower performance for detecting SSL location. The NLP pipeline also demonstrated high performance (&#62;0.98) for counting the number of adenomas and SSLs.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Characteristics of training and testing data sets for the development of the natural language processing pipeline.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="290"/>
            <col width="270"/>
            <col width="270"/>
            <col width="0"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Characteristics</td>
                <td>Training (N=2000)</td>
                <td>Testing (N=1000)</td>
                <td colspan="2"><italic>P</italic> value
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Age, mean (SD)</td>
                <td>58.6 (6.4)</td>
                <td>60.4 (6.5)</td>
                <td colspan="2">&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Sex</bold>
                </td>
                <td>.86</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male, n (%)</td>
                <td>1188 (59.4)</td>
                <td>590 (59.0)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female, n (%)</td>
                <td>812 (40.6)</td>
                <td>410 (41.0)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Adenoma</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overall, n (%)</td>
                <td>925 (46.2)</td>
                <td>475 (47.5)</td>
                <td colspan="2">.72</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Right colon only, n (%)</td>
                <td>501 (25.0)</td>
                <td>265 (26.5)</td>
                <td colspan="2">.54</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Left colon only, n (%)</td>
                <td>212 (10.6)</td>
                <td>113 (11.3)</td>
                <td colspan="2">.65</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Both, n (%)</td>
                <td>212 (10.6)</td>
                <td>97 (9.7)</td>
                <td colspan="2">.53</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Advanced adenoma<sup>a</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overall, n (%)</td>
                <td>77 (3.8)</td>
                <td>34 (3.4)</td>
                <td colspan="2">.62</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Right colon only, n (%)</td>
                <td>51 (2.6)</td>
                <td>14 (1.4)</td>
                <td colspan="2">.06</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Left colon only, n (%)</td>
                <td>24 (1.2)</td>
                <td>18 (1.8)</td>
                <td colspan="2">.26</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Both, n (%)</td>
                <td>3 (0.2)</td>
                <td>2 (0.2)</td>
                <td colspan="2">.87</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Sessile serrated lesion</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overall, n (%)</td>
                <td>121 (6)</td>
                <td>66 (6.6)</td>
                <td colspan="2">.64</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Right colon only, n (%)</td>
                <td>79 (4)</td>
                <td>45 (4.5)</td>
                <td colspan="2">.56</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Left colon only, n (%)</td>
                <td>34 (1.7)</td>
                <td>15 (1.5)</td>
                <td colspan="2">.80</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Both, n (%)</td>
                <td>8 (0.4)</td>
                <td>6 (0.6)</td>
                <td colspan="2">.64</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Advanced sessile serrated lesion<sup>b</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overall, n (%)</td>
                <td>19 (1)</td>
                <td>12 (1.2)</td>
                <td colspan="2">.66</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Right colon only, n (%)</td>
                <td>14 (0.7)</td>
                <td>10 (1)</td>
                <td colspan="2">.52</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Left colon only, n (%)</td>
                <td>4 (0.2)</td>
                <td>1 (0.1)</td>
                <td colspan="2">.88</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Both, n (%)</td>
                <td>1 (0.1)</td>
                <td>1 (0.1)</td>
                <td colspan="2">.80</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Cancer</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overall, n (%)</td>
                <td>3 (0.2)</td>
                <td>0 (0)</td>
                <td colspan="2">.54</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Right colon only, n (%)</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Left colon only, n (%)</td>
                <td>3 (0.2)</td>
                <td>0 (0)</td>
                <td colspan="2">.54</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Both, n (%)</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Advanced adenomas were defined as adenomas ≥1 cm in size or with pathological features such as high-grade dysplasia or villous features.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Advanced sessile serrated lesions were defined as lesions ≥1 cm in size or with pathological features such as low or high-grade dysplasia.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Performance of the natural language processing pipeline in the testing data set (N=1000).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="410"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="140"/>
            <col width="0"/>
            <col width="140"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Indicators</td>
                <td colspan="2">Recall</td>
                <td colspan="2">Precision</td>
                <td>Accuracy</td>
                <td>F1 score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">Presence of a conventional adenoma</td>
                <td colspan="2">0.99</td>
                <td colspan="2">1.00</td>
                <td>0.99</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Location of conventional adenoma</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>None</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.98</td>
                <td colspan="2">0.99</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Right colon only</td>
                <td colspan="2">0.98</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.99</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Left colon only</td>
                <td colspan="2">0.98</td>
                <td colspan="2">0.99</td>
                <td colspan="2">0.99</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Both</td>
                <td colspan="2">0.99</td>
                <td colspan="2">0.97</td>
                <td colspan="2">0.99</td>
                <td>0.98</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Presence of an advanced adenoma<sup>a</sup></td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.97</td>
                <td>0.99</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Location of advanced adenoma</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>None</td>
                <td colspan="2">0.99</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.99</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Right colon only</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.93</td>
                <td colspan="2">0.99</td>
                <td>0.97</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Left colon only</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td>1.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Both</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td>1.00</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Presence of an SSL<sup>b</sup></td>
                <td colspan="2">0.98</td>
                <td colspan="2">1.00</td>
                <td>0.99</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Location of SSL</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>None</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.99</td>
                <td colspan="2">0.99</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Right colon only</td>
                <td colspan="2">0.96</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.99</td>
                <td>0.98</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Left colon only</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td>1.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Both</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.86</td>
                <td colspan="2">0.99</td>
                <td>0.92</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Presence of an advanced SSL<sup>c</sup></td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td>1.00</td>
                <td>1.00</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Location of advanced SSL</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>None</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td>1.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Right colon only</td>
                <td colspan="2">0.90</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.99</td>
                <td>0.95</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Left colon only</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td>1.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Both</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.50</td>
                <td colspan="2">0.99</td>
                <td>0.67</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Total number of adenomas</bold>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.99</td>
                <td colspan="2">1.00</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1-2</td>
                <td colspan="2">0.99</td>
                <td colspan="2">0.99</td>
                <td colspan="2">0.99</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>3-4</td>
                <td colspan="2">0.98</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.98</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>5-10</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td>1.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#62;10</td>
                <td colspan="2">N/A<sup>d</sup></td>
                <td colspan="2">N/A</td>
                <td colspan="2">N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Total number of SSLs</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.99</td>
                <td colspan="2">1.00</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1-2</td>
                <td colspan="2">0.98</td>
                <td colspan="2">1.00</td>
                <td colspan="2">0.98</td>
                <td>0.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>3-4</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td colspan="2">1.00</td>
                <td>1.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>5-10</td>
                <td colspan="2">N/A</td>
                <td colspan="2">N/A</td>
                <td colspan="2">N/A</td>
                <td>N/A</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Advanced adenomas were defined as adenomas ≥1 cm in size or with pathological features such as high-grade dysplasia or villous features.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>SSL: sessile serrated lesion.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>Advanced sessile serrated lesions were defined as lesions ≥1 cm in size or with pathological features such as low or high-grade dysplasia.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>NLP Performance in Calculating Colonoscopy Quality Indicators</title>
        <p>The NLP pipeline assessed the mean ADR and SDR in the test data set as 47.2% (472/1000) and 6.5% (65/1000), respectively. The gold standard evaluation assessed these values as 47.5% (475/1000) and 6.6% (66/1000), respectively (<xref ref-type="table" rid="table3">Table 3</xref>). The differences in assessed ADR and SDR between the manual review, the NLP pipeline, and the gold standard values were not significant. For assessing the number of patients assigned to each of the 6 surveillance interval groups described in the 2020 US Multi-Society Task Force guidelines, the NLP pipeline and manual review demonstrated similar performance; however, the NLP pipeline demonstrated a relatively higher accuracy in assessing the number of patients assigned to the 3-year group than the manual review (63/63, 100% vs 59/63, 93.6%, respectively); this was also true for the 3-5-year group (68/69, 98.6% vs 65/69, 94.2%, respectively). It is a complicated task to assess risk stratification in these groups.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Comparison of polyp detection rate and surveillance interval group assignment as assessed by manual review and the natural language processing pipeline in the test data set (N=1000).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="120"/>
            <col width="0"/>
            <col width="80"/>
            <col width="0"/>
            <col width="70"/>
            <col width="0"/>
            <col width="70"/>
            <col width="0"/>
            <col width="70"/>
            <col width="0"/>
            <col width="70"/>
            <col width="0"/>
            <col width="0"/>
            <col width="170"/>
            <col width="0"/>
            <col width="100"/>
            <col width="0"/>
            <col width="140"/>
            <col width="0"/>
            <col width="0"/>
            <col width="80"/>
            <thead>
              <tr valign="bottom">
                <td colspan="3">Extracted indicators</td>
                <td colspan="11">Human annotator</td>
                <td colspan="7">Method</td>
                <td><italic>P</italic> value<sup>a</sup>
                </td>
              </tr>
              <tr valign="bottom">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">A</td>
                <td colspan="2">B</td>
                <td colspan="2">C</td>
                <td colspan="2">D</td>
                <td colspan="2">E</td>
                <td colspan="3">Manual review<sup>b</sup></td>
                <td colspan="2">NLP system</td>
                <td colspan="2">Gold standard<sup>c</sup></td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="22">
                  <bold>Detection rate, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ADR<sup>d</sup></td>
                <td colspan="2">467  <break/>  
            (46.7)</td>
                <td colspan="2">474  <break/>  
            (47.4)</td>
                <td colspan="2">474  <break/>  
            (47.4)</td>
                <td colspan="2">475  <break/>  
            (47.5)</td>
                <td colspan="2">468  <break/>  
            (46.8)</td>
                <td colspan="3">472  <break/>  
            (47.2)</td>
                <td colspan="2">468  <break/>  
            (46.8)</td>
                <td colspan="2">475  <break/>  
            (47.5)</td>
                <td colspan="3">.92</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SDR<sup>e</sup></td>
                <td colspan="2">65  <break/>  
            (6.5)</td>
                <td colspan="2">64  <break/>  
            (6.4)</td>
                <td colspan="2">66  <break/>  
            (6.6)</td>
                <td colspan="2">64  <break/>  
            (6.4)</td>
                <td colspan="2">64  <break/>  
            (6.4)</td>
                <td colspan="3">65  <break/>  
            (6.5)</td>
                <td colspan="2">64  <break/>  
            (6.4)</td>
                <td colspan="2">66  <break/>  
            (6.6)</td>
                <td colspan="3">.99</td>
              </tr>
              <tr valign="top">
                <td colspan="22">
                  <bold>Surveillance interval group, n (%)</bold>
                </td>
              </tr>
              <tr valign="bottom">
                <td>
                  <break/>
                </td>
                <td>1 year</td>
                <td colspan="2">N/A<sup>f</sup></td>
                <td colspan="2">N/A</td>
                <td colspan="2">N/A</td>
                <td colspan="2">N/A</td>
                <td colspan="2">N/A</td>
                <td colspan="3">N/A</td>
                <td colspan="2">N/A</td>
                <td colspan="2">N/A</td>
                <td colspan="3">N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>3 years</td>
                <td colspan="2">59  <break/>  
            (93.7)</td>
                <td colspan="2">58  <break/>  
            (92.1)</td>
                <td colspan="2">60  <break/>  
            (95.2)</td>
                <td colspan="2">62  <break/>  
            (98.4)</td>
                <td colspan="2">58  <break/>  
            (92.1)</td>
                <td colspan="3">59  <break/>  
            (93.6)</td>
                <td colspan="2">63  <break/>  
            (100)</td>
                <td colspan="2">63  <break/>  
            (100)</td>
                <td colspan="3">.92</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>3-5 years</td>
                <td colspan="2">62  <break/>  
            (89.9)</td>
                <td colspan="2">67  <break/>  
            (97.1)</td>
                <td colspan="2">64  <break/>  
            (92.8)</td>
                <td colspan="2">63  <break/>  
            (91.3)</td>
                <td colspan="2">68  <break/>  
            (98.6)</td>
                <td colspan="3">65  <break/>  
            (93.9)</td>
                <td colspan="2">68  <break/>  
            (94.2)</td>
                <td colspan="2">69  <break/>  
            (100)</td>
                <td colspan="3">.92</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>5-10 years</td>
                <td colspan="2">40  <break/>  
            (100)</td>
                <td colspan="2">40  <break/>  
            (100)</td>
                <td colspan="2">40  <break/>  
            (100)</td>
                <td colspan="2">40  <break/>  
            (100)</td>
                <td colspan="2">40  <break/>  
            (100)</td>
                <td colspan="3">40  <break/>  
            (100)</td>
                <td colspan="2">39  <break/>  
            (97.5)</td>
                <td colspan="2">40  <break/>  
            (100)</td>
                <td colspan="3">.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>7-10 years</td>
                <td colspan="2">339  <break/>  
            (97.7)</td>
                <td colspan="2">347  <break/>  
            (100)</td>
                <td colspan="2">345  <break/>  
            (99.4)</td>
                <td colspan="2">345  <break/>  
            (99.4)</td>
                <td colspan="2">346  <break/>  
            (99.7)</td>
                <td colspan="3">344  <break/>  
            (99.1)</td>
                <td colspan="2">343  <break/>  
            (98.9)</td>
                <td colspan="2">347  <break/>  
            (100)</td>
                <td colspan="3">.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>10 years</td>
                <td colspan="2">479  <break/>  
            (99.6)</td>
                <td colspan="2">480  <break/>  
            (99.8)</td>
                <td colspan="2">481  <break/>  
            (100)</td>
                <td colspan="2">480  <break/>  
            (99.8)</td>
                <td colspan="2">480  <break/>  
            (99.8)</td>
                <td colspan="3">480  <break/>  
            (99.8)</td>
                <td colspan="2">480  <break/>  
            (99.8)</td>
                <td colspan="2">481  <break/>  
            (100)</td>
                <td colspan="3">.99</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup><italic>P</italic> values were calculated using the 2X3 chi-square test.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>Mean of the judgments made by the 5 human annotators.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>Consensus judgment of the 5 human annotators; applied in inconsistent cases.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>ADR: adenoma detection rate.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>SDR: sessile serrated lesion detection rate.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>N/A: not applicable (no patients were assigned a 1-year surveillance interval).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Analysis of ADR, SDR, and Surveillance Intervals in a 10-Year Colonoscopy Report Data Set</title>
        <p>The NLP pipeline was applied to a set of 54,562 colonoscopy reports (and their associated pathology reports) created by 25 endoscopists who examined patients aged ≥50 years over a 10-year period; the NLP analyzed ADR, SDR, and surveillance intervals in the reports (<xref ref-type="table" rid="table4">Table 4</xref>). The overall ADR, advanced ADR, SDR, and advanced SDR were 42% (22,909/54,562), 3.4% (1838/54,562), 3.3% (1806/54,562), and 0.5% (248/54,562), respectively. The difference in detection rate between the endoscopists with the highest and lowest performance was 39.9% (1055/1876, 56.2% vs 264/1615, 16.3%, respectively) for ADR, 5.3% (83/1165, 7.1% vs 30/1615, 1.8%, respectively) for advanced ADR, 6.2% (124/1876, 6.6% vs 6/1615, 0.4%, respectively) for SDR, and 1.6% (11/679, 1.6% vs 0/1615, 0%, respectively) for advanced SDR. Overall, the mean surveillance interval was 8.7 years, and the difference in the surveillance interval assigned by endoscopists with the highest and lowest performance was 1.3 years (9.5 years vs 8.2 years). <xref ref-type="table" rid="table5">Table 5</xref> shows the proportion of patients assigned to each of the 6 surveillance interval groups by groups of endoscopists divided according to the endoscopists’ ADR and SDR. The group of endoscopists with the lowest ADR (&#60;30%) assigned a higher proportion of patients to the longest surveillance interval than did the endoscopists with the highest ADR (&#62;45%). This pattern was similar for the endoscopists with the highest and lowest SDR.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Clinical application of the natural language processing pipeline to nonannotated colonoscopy data created by 25 endoscopists between 2010 and 2019.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="170"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td>Endoscopist</td>
                <td>Procedures</td>
                <td>Adenoma detection rate, n (%)</td>
                <td>Advanced adenoma detection rate, n (%)</td>
                <td>Sessile serrated lesion detection rate, n (%)</td>
                <td>Advanced sessile serrated lesion detection rate, n (%)</td>
                <td>Mean surveillance interval, years</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>A</td>
                <td>3060</td>
                <td>1112 (36.3)</td>
                <td>94 (3.1)</td>
                <td>58 (1.9)</td>
                <td>8 (0.3)</td>
                <td>8.9</td>
              </tr>
              <tr valign="top">
                <td>B</td>
                <td>981</td>
                <td>343 (35)</td>
                <td>36 (3.7)</td>
                <td>8 (0.8)</td>
                <td>0 (0)</td>
                <td>9.0</td>
              </tr>
              <tr valign="top">
                <td>C</td>
                <td>3553</td>
                <td>1447 (40.7)</td>
                <td>129 (3.6)</td>
                <td>91 (2.6)</td>
                <td>21 (0.6)</td>
                <td>8.8</td>
              </tr>
              <tr valign="top">
                <td>D</td>
                <td>2765</td>
                <td>1109 (40.1)</td>
                <td>92 (3.3)</td>
                <td>83 (3)</td>
                <td>17 (0.6)</td>
                <td>8.8</td>
              </tr>
              <tr valign="top">
                <td>E</td>
                <td>1174</td>
                <td>469 (39.9)</td>
                <td>46 (3.9)</td>
                <td>18 (1.5)</td>
                <td>3 (0.3)</td>
                <td>8.9</td>
              </tr>
              <tr valign="top">
                <td>F</td>
                <td>1258</td>
                <td>338 (26.9)</td>
                <td>39 (3.1)</td>
                <td>21 (1.7)</td>
                <td>1 (0.1)</td>
                <td>9.2</td>
              </tr>
              <tr valign="top">
                <td>G</td>
                <td>679</td>
                <td>301 (44.3)</td>
                <td>12 (1.8)</td>
                <td>40 (5.9)</td>
                <td>11 (1.6)</td>
                <td>8.6</td>
              </tr>
              <tr valign="top">
                <td>H</td>
                <td>1165</td>
                <td>505 (43.3)</td>
                <td>83 (7.1)</td>
                <td>21 (1.8)</td>
                <td>4 (0.3)</td>
                <td>8.4</td>
              </tr>
              <tr valign="top">
                <td>I</td>
                <td>1615</td>
                <td>264 (16.3)</td>
                <td>30 (1.9)</td>
                <td>6 (0.4)</td>
                <td>0 (0)</td>
                <td>9.5</td>
              </tr>
              <tr valign="top">
                <td>J</td>
                <td>2091</td>
                <td>917 (43.9)</td>
                <td>43 (2.1)</td>
                <td>92 (4.4)</td>
                <td>12 (0.6)</td>
                <td>8.7</td>
              </tr>
              <tr valign="top">
                <td>K</td>
                <td>1876</td>
                <td>1055 (56.2)</td>
                <td>58 (3.1)</td>
                <td>124 (6.6)</td>
                <td>16 (0.9)</td>
                <td>8.2</td>
              </tr>
              <tr valign="top">
                <td>L</td>
                <td>3284</td>
                <td>1739 (53)</td>
                <td>73 (2.2)</td>
                <td>144 (4.4)</td>
                <td>14 (0.4)</td>
                <td>8.4</td>
              </tr>
              <tr valign="top">
                <td>M</td>
                <td>3437</td>
                <td>1510 (43.9)</td>
                <td>116 (3.4)</td>
                <td>132 (3.8)</td>
                <td>3 (0.1)</td>
                <td>8.6</td>
              </tr>
              <tr valign="top">
                <td>N</td>
                <td>3799</td>
                <td>1708 (45)</td>
                <td>119 (3.1)</td>
                <td>130 (3.4)</td>
                <td>13 (0.3)</td>
                <td>8.6</td>
              </tr>
              <tr valign="top">
                <td>O</td>
                <td>647</td>
                <td>292 (45.1)</td>
                <td>14 (2.2)</td>
                <td>14 (2.2)</td>
                <td>1 (0.2)</td>
                <td>8.8</td>
              </tr>
              <tr valign="top">
                <td>P</td>
                <td>1707</td>
                <td>844 (49.4)</td>
                <td>74 (4.3)</td>
                <td>87 (5.1)</td>
                <td>16 (0.9)</td>
                <td>8.4</td>
              </tr>
              <tr valign="top">
                <td>Q</td>
                <td>2964</td>
                <td>1435 (48.4)</td>
                <td>106 (3.6)</td>
                <td>137 (4.6)</td>
                <td>16 (0.5)</td>
                <td>8.5</td>
              </tr>
              <tr valign="top">
                <td>R</td>
                <td>3209</td>
                <td>1235 (38.5)</td>
                <td>108 (3.4)</td>
                <td>99 (3.1)</td>
                <td>12 (0.4)</td>
                <td>8.8</td>
              </tr>
              <tr valign="top">
                <td>S</td>
                <td>2168</td>
                <td>816 (37.6)</td>
                <td>52 (2.4)</td>
                <td>61 (2.8)</td>
                <td>8 (0.4)</td>
                <td>8.9</td>
              </tr>
              <tr valign="top">
                <td>T</td>
                <td>3834</td>
                <td>1633 (42.6)</td>
                <td>119 (3.1)</td>
                <td>152 (4)</td>
                <td>23 (0.6)</td>
                <td>8.7</td>
              </tr>
              <tr valign="top">
                <td>U</td>
                <td>3935</td>
                <td>1324 (33.6)</td>
                <td>127 (3.2)</td>
                <td>68 (1.7)</td>
                <td>9 (0.2)</td>
                <td>9.1</td>
              </tr>
              <tr valign="top">
                <td>V</td>
                <td>1936</td>
                <td>1014 (52.4)</td>
                <td>114 (5.9)</td>
                <td>104 (5.4)</td>
                <td>17 (0.9)</td>
                <td>8.2</td>
              </tr>
              <tr valign="top">
                <td>W</td>
                <td>643</td>
                <td>268 (41.7)</td>
                <td>33 (5.1)</td>
                <td>4 (0.6)</td>
                <td>0 (0)</td>
                <td>8.8</td>
              </tr>
              <tr valign="top">
                <td>X</td>
                <td>1469</td>
                <td>680 (46.3)</td>
                <td>65 (4.4)</td>
                <td>73 (5)</td>
                <td>16 (1.1)</td>
                <td>8.5</td>
              </tr>
              <tr valign="top">
                <td>Y</td>
                <td>1313</td>
                <td>551 (42)</td>
                <td>56 (4.3)</td>
                <td>39 (3)</td>
                <td>7 (0.5)</td>
                <td>8.7</td>
              </tr>
              <tr valign="top">
                <td>Total</td>
                <td>54,562</td>
                <td>22,909 (42)</td>
                <td>1838 (3.4)</td>
                <td>1806 (3.3)</td>
                <td>248 (0.5)</td>
                <td> 8.7</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Proportion of patients assigned different surveillance intervals, sorted by endoscopists (N=25) with high, medium, and low adenoma detection rates and sessile serrated lesion detection rates.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="110"/>
            <col width="130"/>
            <col width="160"/>
            <col width="160"/>
            <col width="0"/>
            <col width="140"/>
            <col width="160"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Surveillance interval</td>
                <td colspan="4">Adenoma detection rate, n (%)</td>
                <td colspan="3">Sessile serrated lesion detection rate, n (%)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;30%<break/>(n=2873)</td>
                <td>30%-45%<break/>(n=37,806)</td>
                <td>&#62;45%<break/>(n=13,883)</td>
                <td colspan="2">&#60;2%<break/>(n=13,831)</td>
                <td>2%-4%<break/>(n=24,725)</td>
                <td>&#62;4%<break/>(n=16,006)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1 year</td>
                <td>0 (0)</td>
                <td>14 (0.04)</td>
                <td>13 (0.09)</td>
                <td colspan="2">3 (0.02)</td>
                <td>8 (0.03)</td>
                <td>16 (0.1)</td>
              </tr>
              <tr valign="top">
                <td>3 years</td>
                <td>77 (2.68)</td>
                <td>1918 (5.07)</td>
                <td>894 (6.44)</td>
                <td colspan="2">603 (4.36)</td>
                <td>1284 (5.19)</td>
                <td>1002 (6.26)</td>
              </tr>
              <tr valign="top">
                <td>3-5 years</td>
                <td>59 (2.05)</td>
                <td>2204 (5.83)</td>
                <td>1217 (8.77)</td>
                <td colspan="2">545 (3.94)</td>
                <td>1557 (6.3)</td>
                <td>1378 (8.61)</td>
              </tr>
              <tr valign="top">
                <td>5-10 years</td>
                <td>25 (0.87)</td>
                <td>670 (1.77)</td>
                <td>389 (2.80)</td>
                <td colspan="2">138 (1.00)</td>
                <td>491 (1.99)</td>
                <td>455 (2.84)</td>
              </tr>
              <tr valign="top">
                <td>7-10 years</td>
                <td>472 (16.43)</td>
                <td>11,213 (29.66)</td>
                <td>4953 (35.68)</td>
                <td colspan="2">3527 (25.5)</td>
                <td>7508 (30.37)</td>
                <td>5603 (35.01)</td>
              </tr>
              <tr valign="top">
                <td>10 years</td>
                <td>2231 (77.75)</td>
                <td>21,740 (57.5)</td>
                <td>6397 (46.08)</td>
                <td colspan="2">8988 (64.98)</td>
                <td>13,851 (56.02)</td>
                <td> 7529 (47.04)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Comparison With Other NLP Systems</title>
        <p>There have been various efforts to develop NLP systems for monitoring the quality of colonoscopies in Western countries, and these have shown excellent performance in measuring procedure indications, cecal intubation rate, and the presence and location of polyps. NLP systems have been studied that have various levels of complexity and perform various tasks, ranging from simple extraction tasks, such as assessing the presence and location of polyps, to the automated extraction and calculation of quality metrics [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref31">31</xref>]. However, Western-developed NLP systems in previous studies were based on reports written in English and used NLP lexicons from common language systems, such as the unified medical language system and the Systematized Nomenclature of Medicine-Clinical Terms. These systems cannot be applied to a set of reports written in Korean, both Korean and English, and English only, such as the one examined in this study. Therefore, for the first time in Korea, we developed an NLP pipeline to process colonoscopy reports written in multiple languages. A lexicon including Korean and English medical terms and various endoscopic abbreviations was used to construct the NLP pipeline. Hence, our NLP pipeline processed reports with feasible performance in the validation data set for capturing key quality indicators, including the detection rate for SSLs (previous NLP systems have only captured a few SSLs).</p>
        <p>We demonstrated the clinical application of the NLP pipeline with a 10-year set of nonannotated colonoscopy reports. Quality indicators, including ADR, SDR, and surveillance intervals, were extracted from reports written by 25 gastroenterologists, and the proportion of patients assigned different surveillance intervals was analyzed to determine the quality of polyp detection by the endoscopists. We found that ADR and SDR had great variance among the endoscopists, a result that is in line with previous studies [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. There was a 3.4-fold variation in ADR between the endoscopists with the lowest and highest levels (1055/1876, 56.2% vs 264/1615, 16.3%, respectively) and a 16.5-fold variation in SDR (124/1876, 6.6% vs 30/1615, 0.4%, respectively).</p>
      </sec>
      <sec>
        <title>Importance of SSL Detection and Performance Feedback</title>
        <p>Although awareness of the clinical importance of SSLs for colorectal cancer via the serrated pathway has increased since 2010, our data revealed that detecting SSLs remains a challenge for endoscopists performing screening colonoscopies. SSLs typically show a subtle endoscopic appearance: they can be flat, mucus-coated, and have indistinct borders, which is a totally different appearance from conventional adenomas [<xref ref-type="bibr" rid="ref32">32</xref>]. Most recently, Lee et al [<xref ref-type="bibr" rid="ref3">3</xref>] reported the results of a 1-year educational intervention based on a computerized training module that imparted knowledge on the appearance of SSLs using the NICE (Narrow Band Imaging International Colorectal Endoscopic) and WASP (Workgroup on Serrated Polyps and Polyposis) classifications. In this large study, which included 15 experienced endoscopists, the SDR improved significantly, from 4.5% at baseline to 7.1%. Therefore, implementing an NLP system for colonoscopies in clinical practice could provide feedback on the detection performance of individual endoscopists in real time and motivate endoscopists to improve their knowledge and observation techniques for difficult polyps.</p>
      </sec>
      <sec>
        <title>Optimization of Surveillance Interval Recommendations</title>
        <p>Current surveillance interval recommendations for follow-up colonoscopies do not consider the performance of the physician and only consider the characteristics of the removed polyp. Our study reveals that the recommended surveillance interval can be incorrectly long, depending on the performance level of the endoscopist. High-performance endoscopists (ADR &#62;45%) recommended a 10-year surveillance interval in 46.1% of patients (6397/13,883), while low-performance endoscopists (ADR &#60;30%) recommended a 10-year surveillance interval in 77.8% of patients (2231/2873). This wide difference in the proportion of patients that received a recommendation of a 10-year surveillance interval suggests that low-performance endoscopists missed polyps, negatively affecting their calculation of the future risk of patients and leading them to recommend an inappropriately long surveillance interval. Therefore, endoscopists should periodically check their own ability to detect neoplastic polyps and adjust their recommendations for surveillance interval according to their level of performance to prevent cancer development. Colonoscopy NLP systems could have a role in this self-evaluation process, providing an essential clinical decision support system and enabling the optimal choice of surveillance intervals by considering not only the risk of the patient, but also the performance of the endoscopist.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study has the following limitations: First, it was conducted at a single center, leaving open the possibility that the NLP pipeline may not be able to properly process colonoscopy reports retrieved from other centers. As the NLP pipeline is based on regular expression rules formulated from linguistic patterns in the development data set, terms or patterns in other reports that are not present in the development data set can result in false processing of the reports. Second, the integrity of the NLP pipeline depends on the endoscopist’s documentation practice. For example, miswriting orders, numbers, or the count of the biopsied polyps could create mismatches between a colonoscopy report and its associated pathology report, resulting in false processing in the pipeline. However, this is not a problem unique to our study; it applies to all projects that use current NLP pipelines. Therefore, future research may be required to develop more confident NLP systems that warn of the possibility of false processing or to develop more sophisticated systems based on deep learning approaches and cutting-edge NLP models, such as bidirectional encoder representations from transformers (BERT) [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In summary, we developed an NLP pipeline to transform multi-language, free-text reports into a structured format to automate the calculation of quality indicators. The NLP pipeline processed the validation data set with high performance that was similar to a manual review performed by experts. The NLP-derived information from a 10-year real-world data set found that individual endoscopists showed great variance in quality indicators and patient risk stratification. This automated NLP process could be a useful decision support system for endoscopists, as it could allow the optimal recommendation of postcolonoscopy surveillance intervals based on both patient risk and endoscopist performance. This system could positively impact the quality of colonoscopy in many hospitals and health check-up centers that conduct screening programs. Furthermore, information extracted by NLP pipelines from big data derived from colonoscopy reports should be a valuable resource for research into the association of colon polyps with various diseases and into guideline adherence patterns.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ADR</term>
          <def>
            <p>adenoma detection rate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">SDR</term>
          <def>
            <p>sessile serrated lesion detection rate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">SSL</term>
          <def>
            <p>sessile serrated lesion</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was supported by a National Research Foundation of Korea grant funded by the Korean government (grants 2019R1F1A1061665 and 2020R1F1A1068423).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>JHB contributed to conceptualization. SS, SYY, JYS, EHJ, GEC, SJC, HCK, and DUA contributed to data collection and material preparation. JHB contributed to the formal analysis. GS and JHB contributed to devising the methodology. GS and JHB wrote and prepared the original draft. HWH and SYY contributed to writing, reviewing, and editing. HWH and SYY were supervisors.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Senore</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Basu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Anttila</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ponti</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tomatis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Vale</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Ronco</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Soerjomataram</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Primic-Žakelj</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Riggi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Dillner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Elfström</surname>
              <given-names>Miriam Klara</given-names>
            </name>
            <name name-style="western">
              <surname>Lönnberg</surname>
              <given-names>Stefan</given-names>
            </name>
            <name name-style="western">
              <surname>Sankaranarayanan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Segnan</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Performance of colorectal cancer screening in the European Union Member States: data from the second European screening report</article-title>
          <source>Gut</source>
          <year>2019</year>
          <month>07</month>
          <day>10</day>
          <volume>68</volume>
          <issue>7</issue>
          <fpage>1232</fpage>
          <lpage>1244</lpage>
          <pub-id pub-id-type="doi">10.1136/gutjnl-2018-317293</pub-id>
          <pub-id pub-id-type="medline">30530530</pub-id>
          <pub-id pub-id-type="pii">gutjnl-2018-317293</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burr</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Derbyshire</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Whalley</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Subramanian</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Finan</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rutter</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Valori</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>EJA</given-names>
            </name>
          </person-group>
          <article-title>Variation in post-colonoscopy colorectal cancer across colonoscopy providers in English National Health Service: population based cohort study</article-title>
          <source>BMJ</source>
          <year>2019</year>
          <month>11</month>
          <day>13</day>
          <volume>367</volume>
          <fpage>l6090</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bmj.com/lookup/pmidlookup?view=long&#38;pmid=31722875"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.l6090</pub-id>
          <pub-id pub-id-type="medline">31722875</pub-id>
          <pub-id pub-id-type="pmcid">PMC6849511</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bae</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>HY</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kwak</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Seo</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>JI</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Yim</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>YM</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Impact of comprehensive optical diagnosis training using Workgroup serrAted polypS and Polyposis classification on detection of adenoma and sessile serrated lesion</article-title>
          <source>Dig Endosc</source>
          <year>2022</year>
          <month>01</month>
          <day>12</day>
          <volume>34</volume>
          <issue>1</issue>
          <fpage>180</fpage>
          <lpage>190</lpage>
          <pub-id pub-id-type="doi">10.1111/den.14046</pub-id>
          <pub-id pub-id-type="medline">34021513</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hetzel</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Coukos</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Omstead</surname>
              <given-names>Kelsey</given-names>
            </name>
            <name name-style="western">
              <surname>Cerda</surname>
              <given-names>Sandra R</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Shi</given-names>
            </name>
            <name name-style="western">
              <surname>O'Brien</surname>
              <given-names>Michael J</given-names>
            </name>
            <name name-style="western">
              <surname>Farraye</surname>
              <given-names>Francis A</given-names>
            </name>
          </person-group>
          <article-title>Variation in the detection of serrated polyps in an average risk colorectal cancer screening cohort</article-title>
          <source>Am J Gastroenterol</source>
          <year>2010</year>
          <month>12</month>
          <volume>105</volume>
          <issue>12</issue>
          <fpage>2656</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1038/ajg.2010.315</pub-id>
          <pub-id pub-id-type="medline">20717107</pub-id>
          <pub-id pub-id-type="pii">ajg2010315</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rex</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Schoenfeld</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pike</surname>
              <given-names>IM</given-names>
            </name>
            <name name-style="western">
              <surname>Adler</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Fennerty</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Lieb</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>WG</given-names>
            </name>
            <name name-style="western">
              <surname>Rizk</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Sawhney</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Shaheen</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weinberg</surname>
              <given-names>DS</given-names>
            </name>
          </person-group>
          <article-title>Quality indicators for colonoscopy</article-title>
          <source>Gastrointest Endosc</source>
          <year>2015</year>
          <month>01</month>
          <volume>81</volume>
          <issue>1</issue>
          <fpage>31</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.1016/j.gie.2014.07.058</pub-id>
          <pub-id pub-id-type="medline">25480100</pub-id>
          <pub-id pub-id-type="pii">S0016-5107(14)02051-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Butterly</surname>
              <given-names>LF</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>Providing data for serrated polyp detection rate benchmarks: an analysis of the New Hampshire Colonoscopy Registry</article-title>
          <source>Gastrointest Endosc</source>
          <year>2017</year>
          <month>06</month>
          <volume>85</volume>
          <issue>6</issue>
          <fpage>1188</fpage>
          <lpage>1194</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28153571"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.gie.2017.01.020</pub-id>
          <pub-id pub-id-type="medline">28153571</pub-id>
          <pub-id pub-id-type="pii">S0016-5107(17)30063-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC5438272</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Corley</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Marks</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>WK</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Doubeni</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Zauber</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>de Boer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fireman</surname>
              <given-names>BH</given-names>
            </name>
            <name name-style="western">
              <surname>Schottinger</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Quinn</surname>
              <given-names>VP</given-names>
            </name>
            <name name-style="western">
              <surname>Ghai</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Quesenberry</surname>
              <given-names>CP</given-names>
            </name>
          </person-group>
          <article-title>Adenoma detection rate and risk of colorectal cancer and death</article-title>
          <source>N Engl J Med</source>
          <year>2014</year>
          <month>04</month>
          <day>03</day>
          <volume>370</volume>
          <issue>14</issue>
          <fpage>1298</fpage>
          <lpage>306</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24693890"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMoa1309086</pub-id>
          <pub-id pub-id-type="medline">24693890</pub-id>
          <pub-id pub-id-type="pmcid">PMC4036494</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lieberman</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Rex</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Winawer</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Giardiello</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>TR</given-names>
            </name>
          </person-group>
          <article-title>Guidelines for colonoscopy surveillance after screening and polypectomy: a consensus update by the US Multi-Society Task Force on Colorectal Cancer</article-title>
          <source>Gastroenterology</source>
          <year>2012</year>
          <month>09</month>
          <volume>143</volume>
          <issue>3</issue>
          <fpage>844</fpage>
          <lpage>857</lpage>
          <pub-id pub-id-type="doi">10.1053/j.gastro.2012.06.001</pub-id>
          <pub-id pub-id-type="medline">22763141</pub-id>
          <pub-id pub-id-type="pii">S0016-5085(12)00812-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lieberman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nadel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Atkin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Duggirala</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Fletcher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Glick</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Pope</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Potter</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Ransohoff</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rex</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Schoen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Schroy</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Winawer</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Standardized colonoscopy reporting and data system: report of the Quality Assurance Task Group of the National Colorectal Cancer Roundtable</article-title>
          <source>Gastrointest Endosc</source>
          <year>2007</year>
          <month>05</month>
          <volume>65</volume>
          <issue>6</issue>
          <fpage>757</fpage>
          <lpage>66</lpage>
          <pub-id pub-id-type="doi">10.1016/j.gie.2006.12.055</pub-id>
          <pub-id pub-id-type="medline">17466195</pub-id>
          <pub-id pub-id-type="pii">S0016-5107(07)00003-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abdul-Baki</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Schoen</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rose</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Leffler</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Kuganeswaran</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Carrell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mehrotra</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Public reporting of colonoscopy quality is associated with an increase in endoscopist adenoma detection rate</article-title>
          <source>Gastrointest Endosc</source>
          <year>2015</year>
          <month>10</month>
          <volume>82</volume>
          <issue>4</issue>
          <fpage>676</fpage>
          <lpage>82</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26385276"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.gie.2014.12.058</pub-id>
          <pub-id pub-id-type="medline">26385276</pub-id>
          <pub-id pub-id-type="pii">S0016-5107(14)02629-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4575767</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sey</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Asfaha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Siebring</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Jairath</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Performance report cards increase adenoma detection rate</article-title>
          <source>Endosc Int Open</source>
          <year>2017</year>
          <month>07</month>
          <day>06</day>
          <volume>5</volume>
          <issue>7</issue>
          <fpage>E675</fpage>
          <lpage>E682</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.thieme-connect.com/DOI/DOI?10.1055/s-0043-110568"/>
          </comment>
          <pub-id pub-id-type="doi">10.1055/s-0043-110568</pub-id>
          <pub-id pub-id-type="medline">28691053</pub-id>
          <pub-id pub-id-type="pmcid">PMC5500116</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meystre</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Kipper-Schuler</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Hurdle</surname>
              <given-names>JF</given-names>
            </name>
          </person-group>
          <article-title>Extracting information from textual documents in the electronic health record: a review of recent research</article-title>
          <source>Yearb Med Inform</source>
          <year>2008</year>
          <fpage>128</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="medline">18660887</pub-id>
          <pub-id pub-id-type="pii">me08010128</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pons</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Braun</surname>
              <given-names>LMM</given-names>
            </name>
            <name name-style="western">
              <surname>Hunink</surname>
              <given-names>MGM</given-names>
            </name>
            <name name-style="western">
              <surname>Kors</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Natural Language Processing in Radiology: A Systematic Review</article-title>
          <source>Radiology</source>
          <year>2016</year>
          <month>05</month>
          <volume>279</volume>
          <issue>2</issue>
          <fpage>329</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.1148/radiol.16142770</pub-id>
          <pub-id pub-id-type="medline">27089187</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nehme</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Feldman</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Evolving Role and Future Directions of Natural Language Processing in Gastroenterology</article-title>
          <source>Dig Dis Sci</source>
          <year>2021</year>
          <month>01</month>
          <day>27</day>
          <volume>66</volume>
          <issue>1</issue>
          <fpage>29</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1007/s10620-020-06156-y</pub-id>
          <pub-id pub-id-type="medline">32107677</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10620-020-06156-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ridgway</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Uvin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schmitt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Oliwa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Almirol</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schneider</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Natural Language Processing of Clinical Notes to Identify Mental Illness and Substance Use Among People Living with HIV: Retrospective Cohort Study</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>03</month>
          <day>10</day>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>e23456</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/3/e23456/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23456</pub-id>
          <pub-id pub-id-type="medline">33688848</pub-id>
          <pub-id pub-id-type="pii">v9i3e23456</pub-id>
          <pub-id pub-id-type="pmcid">PMC7991991</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arnaud</surname>
              <given-names>É</given-names>
            </name>
            <name name-style="western">
              <surname>Elbattah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gignon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dequen</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Deep Learning to Predict Hospitalization at Triage: Integration of Structured Data and Unstructured Text</article-title>
          <year>2021</year>
          <conf-name>IEEE International Conference on Big Data</conf-name>
          <conf-date>Dec 10-13, 2020</conf-date>
          <conf-loc>Atlanta, GA</conf-loc>
          <publisher-name>IEEE</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/abstract/document/9378073"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/BigData50022.2020.9378073</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Levis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Leonard Westgate</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gui</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Watts</surname>
              <given-names>BV</given-names>
            </name>
            <name name-style="western">
              <surname>Shiner</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of clinical mental health notes may add predictive value to existing suicide risk models</article-title>
          <source>Psychol. Med</source>
          <year>2020</year>
          <month>02</month>
          <day>17</day>
          <volume>51</volume>
          <issue>8</issue>
          <fpage>1382</fpage>
          <lpage>1391</lpage>
          <pub-id pub-id-type="doi">10.1017/s0033291720000173</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mehrabi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Clinical information extraction applications: A literature review</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>01</month>
          <volume>77</volume>
          <fpage>34</fpage>
          <lpage>49</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30256-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.11.011</pub-id>
          <pub-id pub-id-type="medline">29162496</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30256-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC5771858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Van Rossum</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The Python Library Reference, release 3.7.10</article-title>
          <source>The Python Library Reference</source>
          <year>2020</year>
          <publisher-loc>Amsterdam, The Netherlands</publisher-loc>
          <publisher-name>Python Software Foundation</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>TO</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>DH</given-names>
            </name>
          </person-group>
          <source>Guide of gastroenterological endoscopy in clinical practice: Korean Society of Gastrointestinal Endoscopy</source>
          <year>2013</year>
          <month>07</month>
          <day>03</day>
          <publisher-loc>Seoul, Republic of Korea</publisher-loc>
          <publisher-name>대한의학서적</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lieberman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Burke</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Dominitz</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Kaltenbach</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Robertson</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shaukat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Syngal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rex</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Recommendations for Follow-Up After Colonoscopy and Polypectomy: A Consensus Update by the US Multi-Society Task Force on Colorectal Cancer</article-title>
          <source>Am J Gastroenterol</source>
          <year>2020</year>
          <month>03</month>
          <day>7</day>
          <volume>115</volume>
          <issue>3</issue>
          <fpage>415</fpage>
          <lpage>434</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32039982"/>
          </comment>
          <pub-id pub-id-type="doi">10.14309/ajg.0000000000000544</pub-id>
          <pub-id pub-id-type="medline">32039982</pub-id>
          <pub-id pub-id-type="pii">00000434-202003000-00019</pub-id>
          <pub-id pub-id-type="pmcid">PMC7393611</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Virtanen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gommers</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Oliphant</surname>
              <given-names>TE</given-names>
            </name>
            <name name-style="western">
              <surname>Haberland</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Reddy</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Burovski</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weckesser</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bright</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>van der Walt</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brett</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Millman</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mayorov</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>ARJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kern</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Carey</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Polat</surname>
              <given-names>?</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>VanderPlas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Laxalde</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Perktold</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cimrman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Henriksen</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Quintero</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Archibald</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Ribeiro</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>van Mulbregt</surname>
              <given-names>P</given-names>
            </name>
            <collab>SciPy 1.0 Contributors</collab>
          </person-group>
          <article-title>SciPy 1.0: fundamental algorithms for scientific computing in Python</article-title>
          <source>Nat Methods</source>
          <year>2020</year>
          <month>03</month>
          <day>3</day>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>261</fpage>
          <lpage>272</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32015543"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41592-019-0686-2</pub-id>
          <pub-id pub-id-type="medline">32015543</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41592-019-0686-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7056644</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deutsch</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Colonoscopy quality, quality measures, and a natural language processing tool for electronic health records</article-title>
          <source>Gastrointest Endosc</source>
          <year>2012</year>
          <month>06</month>
          <volume>75</volume>
          <issue>6</issue>
          <fpage>1240</fpage>
          <lpage>2</lpage>
          <pub-id pub-id-type="doi">10.1016/j.gie.2012.02.031</pub-id>
          <pub-id pub-id-type="medline">22624812</pub-id>
          <pub-id pub-id-type="pii">S0016-5107(12)00196-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gawron</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>WK</given-names>
            </name>
            <name name-style="western">
              <surname>Keswani</surname>
              <given-names>RN</given-names>
            </name>
            <name name-style="western">
              <surname>Rasmussen</surname>
              <given-names>LV</given-names>
            </name>
            <name name-style="western">
              <surname>Kho</surname>
              <given-names>AN</given-names>
            </name>
          </person-group>
          <article-title>Anatomic and advanced adenoma detection rates as quality metrics determined via natural language processing</article-title>
          <source>Am J Gastroenterol</source>
          <year>2014</year>
          <month>12</month>
          <volume>109</volume>
          <issue>12</issue>
          <fpage>1844</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1038/ajg.2014.147</pub-id>
          <pub-id pub-id-type="medline">24935271</pub-id>
          <pub-id pub-id-type="pii">ajg2014147</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harkema</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Saul</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dellon</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Schoen</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Mehrotra</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Developing a natural language processing application for measuring the quality of colonoscopy procedures</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <month>12</month>
          <volume>18 Suppl 1</volume>
          <fpage>i150</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21946240"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000431</pub-id>
          <pub-id pub-id-type="medline">21946240</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000431</pub-id>
          <pub-id pub-id-type="pmcid">PMC3241178</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Imler</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>Morea</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Imperiale</surname>
              <given-names>TF</given-names>
            </name>
          </person-group>
          <article-title>Clinical decision support with natural language processing facilitates determination of colonoscopy surveillance intervals</article-title>
          <source>Clin Gastroenterol Hepatol</source>
          <year>2014</year>
          <month>07</month>
          <volume>12</volume>
          <issue>7</issue>
          <fpage>1130</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cgh.2013.11.025</pub-id>
          <pub-id pub-id-type="medline">24316106</pub-id>
          <pub-id pub-id-type="pii">S1542-3565(13)01831-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Imler</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>Morea</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kahi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Imperiale</surname>
              <given-names>TF</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing accurately categorizes findings from colonoscopy and pathology reports</article-title>
          <source>Clin Gastroenterol Hepatol</source>
          <year>2013</year>
          <month>06</month>
          <volume>11</volume>
          <issue>6</issue>
          <fpage>689</fpage>
          <lpage>94</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23313839"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cgh.2012.11.035</pub-id>
          <pub-id pub-id-type="medline">23313839</pub-id>
          <pub-id pub-id-type="pii">S1542-3565(13)00010-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC4026927</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Imler</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>Morea</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kahi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sherer</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Cardwell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ahnen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Antaki</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ashley</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Baffy</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Dominitz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Korsten</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nagar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Promrat</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Robertson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Saini</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shergill</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Smalley</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Imperiale</surname>
              <given-names>TF</given-names>
            </name>
          </person-group>
          <article-title>Multi-center colonoscopy quality measurement utilizing natural language processing</article-title>
          <source>Am J Gastroenterol</source>
          <year>2015</year>
          <month>04</month>
          <volume>110</volume>
          <issue>4</issue>
          <fpage>543</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1038/ajg.2015.51</pub-id>
          <pub-id pub-id-type="medline">25756240</pub-id>
          <pub-id pub-id-type="pii">ajg201551</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Zauber</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Doubeni</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>WK</given-names>
            </name>
            <name name-style="western">
              <surname>Corley</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>Accurate Identification of Colonoscopy Quality and Polyp Findings Using Natural Language Processing</article-title>
          <source>J Clin Gastroenterol</source>
          <year>2019</year>
          <month>01</month>
          <volume>53</volume>
          <issue>1</issue>
          <fpage>e25</fpage>
          <lpage>e30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28906424"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MCG.0000000000000929</pub-id>
          <pub-id pub-id-type="medline">28906424</pub-id>
          <pub-id pub-id-type="pmcid">PMC5847417</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mehrotra</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dellon</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Schoen</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Saul</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bishehsari</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Farmer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Harkema</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Applying a natural language processing tool to electronic health records to assess performance on colonoscopy quality measures</article-title>
          <source>Gastrointest Endosc</source>
          <year>2012</year>
          <month>06</month>
          <volume>75</volume>
          <issue>6</issue>
          <fpage>1233</fpage>
          <lpage>9.e14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22482913"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.gie.2012.01.045</pub-id>
          <pub-id pub-id-type="medline">22482913</pub-id>
          <pub-id pub-id-type="pii">S0016-5107(12)00104-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC3852911</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raju</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Lum</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Slack</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Thirumurthi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lynch</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Davila</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Bhutani</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Shafi</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Bresalier</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Dekovich</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Guha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pande</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Blechacz</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rashid</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Routbort</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shuttlesworth</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mishra</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stroehlein</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>WA</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing as an alternative to manual reporting of colonoscopy quality metrics</article-title>
          <source>Gastrointest Endosc</source>
          <year>2015</year>
          <month>09</month>
          <volume>82</volume>
          <issue>3</issue>
          <fpage>512</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25910665"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.gie.2015.01.049</pub-id>
          <pub-id pub-id-type="medline">25910665</pub-id>
          <pub-id pub-id-type="pii">S0016-5107(15)00100-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC4540652</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Musquer</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>IJspeert J, Bastiaansen B, Leerdam M, et al (2016) Development and validation of the WASP classification system for optical diagnosis of adenomas, hyperplastic polyps and sessile serrated adenomas/polyps. Gut 65:963–970</article-title>
          <source>Colon Rectum</source>
          <year>2018</year>
          <month>08</month>
          <day>21</day>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>200</fpage>
          <lpage>203</lpage>
          <pub-id pub-id-type="doi">10.3166/cer-2018-0029</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Bert: Pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</source>
          <year>2019</year>
          <conf-name>2019 Annual Conference of the North American Chapter of the Association for Computational Linguistics</conf-name>
          <conf-date>2-7 June. 2019</conf-date>
          <conf-loc>Minneapolis, Minnesota</conf-loc>
          <fpage>4171</fpage>
          <lpage>4186</lpage>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
