<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i5e24381</article-id>
      <article-id pub-id-type="pmid">33944791</article-id>
      <article-id pub-id-type="doi">10.2196/24381</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Automating Stroke Data Extraction From Free-Text Radiology Reports Using Natural Language Processing: Instrument Validation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Dewey</surname>
            <given-names>Rebecca</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Aksoy</surname>
            <given-names>Mehmet</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>Amy Y X</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Medicine (Neurology)</institution>
            <institution>University of Toronto – Sunnybrook Health Sciences Centre</institution>
            <addr-line>2075 Bayview Avenue</addr-line>
            <addr-line>Toronto, ON, M4N 3M5</addr-line>
            <country>Canada</country>
            <phone>1 416 480 6100 ext 4866</phone>
            <fax>1 416 480 5753</fax>
            <email>amyyx.yu@utoronto.ca</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7276-9551</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Zhongyu A</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6324-1157</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Pou-Prom</surname>
            <given-names>Chloe</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5719-7786</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Lopes</surname>
            <given-names>Kaitlyn</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2030-7627</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Kapral</surname>
            <given-names>Moira K</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3444-9928</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Aviv</surname>
            <given-names>Richard I</given-names>
          </name>
          <degrees>MBChB</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0259-970X</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Mamdani</surname>
            <given-names>Muhammad</given-names>
          </name>
          <degrees>MA, MPH, PharmD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5199-6344</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Medicine (Neurology)</institution>
        <institution>University of Toronto – Sunnybrook Health Sciences Centre</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Unity Health Toronto</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Medicine (General Internal Medicine)</institution>
        <institution>University of Toronto – University Health Network</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Radiology</institution>
        <institution>Division of Neuroradiology</institution>
        <institution>University of Ottawa</institution>
        <addr-line>Ottawa, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Medicine</institution>
        <institution>Unity Health Toronto</institution>
        <institution>University of Toronto</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Amy Y X Yu <email>amyyx.yu@utoronto.ca</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>4</day>
        <month>5</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>5</issue>
      <elocation-id>e24381</elocation-id>
      <history>
        <date date-type="received">
          <day>16</day>
          <month>9</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>28</day>
          <month>10</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>10</day>
          <month>11</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>16</day>
          <month>4</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Amy Y X Yu, Zhongyu A Liu, Chloe Pou-Prom, Kaitlyn Lopes, Moira K Kapral, Richard I Aviv, Muhammad Mamdani. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 04.05.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/5/e24381" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Diagnostic neurovascular imaging data are important in stroke research, but obtaining these data typically requires laborious manual chart reviews.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aimed to determine the accuracy of a natural language processing (NLP) approach to extract information on the presence and location of vascular occlusions as well as other stroke-related attributes based on free-text reports.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>From the full reports of 1320 consecutive computed tomography (CT), CT angiography, and CT perfusion scans of the head and neck performed at a tertiary stroke center between October 2017 and January 2019, we manually extracted data on the presence of proximal large vessel occlusion (primary outcome), as well as distal vessel occlusion, ischemia, hemorrhage, Alberta stroke program early CT score (ASPECTS), and collateral status (secondary outcomes). Reports were randomly split into training (n=921) and validation (n=399) sets, and attributes were extracted using rule-based NLP. We reported the sensitivity, specificity, positive predictive value (PPV), negative predictive value (NPV), and the overall accuracy of the NLP approach relative to the manually extracted data.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The overall prevalence of large vessel occlusion was 12.2%. In the training sample, the NLP approach identified this attribute with an overall accuracy of 97.3% (95.5% sensitivity, 98.1% specificity, 84.1% PPV, and 99.4% NPV). In the validation set, the overall accuracy was 95.2% (90.0% sensitivity, 97.4% specificity, 76.3% PPV, and 98.5% NPV). The accuracy of identifying distal or basilar occlusion as well as hemorrhage was also high, but there were limitations in identifying cerebral ischemia, ASPECTS, and collateral status.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>NLP may improve the efficiency of large-scale imaging data collection for stroke surveillance and research.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>stroke</kwd>
        <kwd>diagnostic imaging</kwd>
        <kwd>data extraction</kwd>
        <kwd>natural language processing</kwd>
        <kwd>neurovascular</kwd>
        <kwd>imaging</kwd>
        <kwd>stroke surveillance</kwd>
        <kwd>surveillance</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Stroke is a leading cause of death and disability [<xref ref-type="bibr" rid="ref1">1</xref>]. Neuroimaging study findings inform treatment and prognosis. For example, recent clinical trials have demonstrated the efficacy of endovascular thrombectomy, a mechanical clot-retrieval procedure, in improving functional outcomes in patients with acute ischemic stroke and proximal large vessel occlusion [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Data on efficacy of this procedure in patients with distal or smaller vessel occlusion are currently lacking. Although large health administrative databases have information on whether a stroke was ischemic or hemorrhagic, detailed neuroimaging findings are usually found in narrative diagnostic imaging reports and obtained through resource-intensive manual chart abstractions [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      <p>The lack of population-based neuroimaging data limits the ability to characterize the prevalence of large vessel occlusion. A recent meta-analysis of cohort studies of patients with ischemic stroke found that the prevalence of large vessel occlusion ranged widely, from 13% to 52% [<xref ref-type="bibr" rid="ref8">8</xref>], suggesting that smaller cohort studies can be vulnerable to selection bias. Therefore, automating the extraction of information on vessel occlusion from diagnostic imaging reports is needed for population-based disease surveillance and clinical research.</p>
      <p>Natural language processing (NLP) can convert large amounts of free-text data into structured data and has been used to extract information on stroke type and location from diagnostic imaging reports [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. However, its ability to characterize vascular occlusions is not well understood. We aimed to determine the accuracy of an NLP tool [<xref ref-type="bibr" rid="ref12">12</xref>] in identifying the presence and location of vascular occlusions and other stroke-related attributes from neuroimaging reports of computed tomography (CT), CT angiography (CTA), and CT perfusion (CTP) scans. We hypothesized that an NLP tool can identify large vessel occlusion with high accuracy.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Manual Chart Abstraction</title>
        <p>We obtained full free-text reports of 1320 consecutive stroke protocol imaging studies comprising CT, CTA, and CTP imaging of the head and neck performed between October 2017 and January 2019 at a university-affiliated comprehensive stroke center that provides consultation for endovascular thrombectomy to a catchment area of 2.5 million people. A stroke specialist and a trained research assistant manually extracted stroke-related attributes from the reports. The primary outcome was the presence of large vessel occlusion defined as occlusion in the M1 segment of the middle cerebral artery (MCA-M1) or A1 segment of the anterior cerebral artery (ACA-A1) with or without involvement of the carotid terminus because occlusion at these sites is treatable with endovascular thrombectomy. We chose this as the primary outcome because patients with this type of occlusion can be treated with endovascular thrombectomy. Isolated intracranial internal carotid artery occlusion was not categorized as large vessel occlusion in this study because the effectiveness of endovascular thrombectomy has not been shown in this population [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
        <p>Secondary outcomes included (1) the presence of cerebral ischemia, (2) Alberta stroke program early CT score (ASPECTS) [<xref ref-type="bibr" rid="ref14">14</xref>], (3) the presence of any intracranial hemorrhage, (4) distal anterior circulation occlusion defined as occlusion in the middle or anterior cerebral arteries in the M2 or A2 segments or beyond, (5) basilar occlusion, and (6) qualitative measure of collateral status (ie, good, intermediate, or poor). The manually extracted data were considered the reference standard. Duplicate chart abstraction on 200 charts showed that the inter-rater reliability was &#62;96% for all attributes except for the presence of cerebral ischemia for which it was 80%. We randomly split the reports into training (n=921) and validation (n 399) sets.</p>
      </sec>
      <sec>
        <title>CHARTextract NLP Tool</title>
        <p>NLP rule sets for stroke attribute extraction from free-text diagnostic imaging reports were created using CHARTextract version 0.3.2, freely available online [<xref ref-type="bibr" rid="ref12">12</xref>]. CHARTextract is a rule-based information extraction tool that relies on regular expressions and works at the sentence level to identify word patterns. We opted to use a rule-based approach due to the small sample size and the availability of domain experts to develop and refine the rules.</p>
        <p>We created information extraction pipelines by using an iterative process where each rule was assigned a weight by the end-user in the training set. For example, if a report contains the text “presence of middle cerebral artery occlusion…,” the system’s estimate of the probability of a large vessel occlusion increases; however, if a report contains the text “no evidence of…,” it will lower the system’s estimate of the probability. As shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>, the tool displays the discrepancies between the chart abstractor label and the tool’s prediction, thus allowing for rapid iterative refinement of the rules by the end user. Rules were developed for each attribute through an iterative process by the end-user (ZL, AY, and CP) by using the training set that was validated in the validation set. For the presence of large vessel occlusion (our primary outcome), we also recorded whether the discrepancy between the chart abstractor and the NLP tool was due to abstractor or tool error. The rules thus developed are shown in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Example 1 of a discrepancy between the chart abstractor and CHARTextract tool output. (A) Computed tomography angiography scan showing loss of opacification in the left middle cerebral artery, involving the left M1 segment and extending into the M2 segment.
(B) CHARTextract tool output: the chart abstractor labeled that large vessel occlusion was present, but the CHARTextract tool determined this attribute to be absent. The rules were revised to reflect that occlusion involving the “M1 segment” should be considered a large vessel occlusion even if the terms “MCA” or “middle cerebral artery” were absent.</p>
          </caption>
          <graphic xlink:href="medinform_v9i5e24381_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Statistical Methods</title>
        <p>The stroke-related attributes identified by the NLP tool, CHARTextract version 0.3.2, were compared to the reference standard. The sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV) were calculated using this tool.</p>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>The study was approved by the Sunnybrook Health Sciences Centre and Unity Health Toronto Research Ethics Boards with a waiver of individual patient consent prior to data collection.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>Among the 1320 consecutive diagnostic imaging reports manually reviewed, chart abstractors identified 184 large vessel occlusions (MCA-M1, n=157; ACA-A1, n=27) in 161 (12.2%) reports. Distal anterior circulation occlusion was reported in 188 (14.2%) scans, basilar artery occlusion in 26 (2.0%) scans, established ischemia in 391 (29.6%) scans, and intracranial hemorrhage in 139 (10.5%) scans. ASPECTS was reported only in 384 (29.1%) reports (ASPECTS &#60;5, n=40; ASPECTS ≥5, n=344), and collateral status was described in 216 (16.4%) reports (good, n=141; intermediate, n=26; poor, n=49).</p>
      <p>Compared to the reference standard, the NLP tool identified large vessel occlusion with an overall accuracy of 97.3% (95.5% sensitivity, 98.1% specificity, 84.1% PPV, and 99.4% NPV). Despite an iterative process to refine rules, some scenarios remained challenging to translate into rules. <xref rid="figure2" ref-type="fig">Figure 2</xref> illustrates an example wherein the CHARTextract tool determined large vessel occlusion to be present because the words “occlusion” and “M1 segment” were detected in the same sentence, but the report indicated that the occlusion was in the cavernous portion of the internal carotid artery with reconstitution of blood flow in the M1 segment. In another example illustrated in <xref rid="figure3" ref-type="fig">Figure 3</xref>, the CHARTextract tool determined that large vessel occlusion was absent because the report indicated the presence of an occlusion extending from the internal carotid artery to the M2 segment. Here, the tool only detected “internal carotid artery” and “M2” as keywords and could not interpret the vascular anatomy described in the report. Nevertheless, in the validation set, the overall accuracy for large vessel occlusion was still high at 95.2% (90.0% sensitivity, 97.4% specificity, 76.3% PPV, and 98.5% NPV). We also found that two of the 25 discrepancies between the abstractors and the NLP tool were due to chart abstractor error.</p>
      <fig id="figure2" position="float">
        <label>Figure 2</label>
        <caption>
          <p>Example 2 of a discrepancy between the chart abstractor and CHARTextract tool output. (A) Computed tomography angiography scan showing near-occlusion of the cavernous internal carotid artery with reconstitution of the middle cerebral artery. 
(B) CHARTextract output: the abstractor labeled that large vessel occlusion was absent, but the CHARTextract tool determined this attribute to be present because the words “occlusion” and “M1 segment” were detected in the same sentence.</p>
        </caption>
        <graphic xlink:href="medinform_v9i5e24381_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <fig id="figure3" position="float">
        <label>Figure 3</label>
        <caption>
          <p>Example 3 of a discrepancy between the chart abstractor and CHARTextract tool output. The abstractor labeled that large vessel occlusion was present because the abstractor was able to interpret that an occlusion from the internal carotid artery and extending to the M2 segment of the middle cerebral artery involves the M1 segment, but the CHARTextract tool determined this attribute to be absent because the tool detects key words without knowledge of vascular anatomy.</p>
        </caption>
        <graphic xlink:href="medinform_v9i5e24381_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>The accuracy of the CHARTextract tool for the other stroke attributes is presented in <xref ref-type="table" rid="table1">Table 1</xref>. The tool identified these other attributes with moderately high accuracy except for presence of established ischemia, which had a lower sensitivity and PPV of 82.2% and 80.5%, respectively, in the derivation cohort and 80.8% and 64.1%, respectively, in the validation cohort. The other exception was basilar occlusion, which was only present in 2.0% (26/1320) of the reports. Although the sensitivity and PPV for basilar occlusion were 100% and 95.0%, respectively, in the derivation cohort, the corresponding values were lower in the validation cohort (ie, 71.4% and 41.7%)</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Accuracy of the natural language processing tool CHARTextract to identify stroke-related attributes in diagnostic imaging reports.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="240"/>
          <col width="130"/>
          <col width="120"/>
          <col width="120"/>
          <col width="120"/>
          <col width="120"/>
          <col width="120"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Cohort and stroke-related attribute</td>
              <td>Attribute prevalence, n (%)</td>
              <td>Sensitivity (%)</td>
              <td>Specificity(%)</td>
              <td>PPV<sup>a</sup> (%)</td>
              <td>NPV<sup>b</sup> (%)</td>
              <td>Overall accuracy (%)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="8">
                <bold>Derivation cohort (n=921)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td> Anterior proximal occlusion<break/>  <break/>  </td>
              <td>111 (12.1)</td>
              <td>95.5</td>
              <td>98.1</td>
              <td>84.1</td>
              <td>99.4</td>
              <td>97.3</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td> Anterior distal occlusion<break/>  <break/>  </td>
              <td>127 (13.8)</td>
              <td>92.9</td>
              <td>98.0</td>
              <td>88.1</td>
              <td>98.9</td>
              <td>97.3</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td> Basilar occlusion<break/>  <break/>  </td>
              <td>19 (2.1)</td>
              <td>100</td>
              <td>99.9</td>
              <td>95.0</td>
              <td>100</td>
              <td>99.9</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td> Presence of established ischemia</td>
              <td>287 (31.2)</td>
              <td>82.2</td>
              <td>91.7</td>
              <td>80.5</td>
              <td>91.9</td>
              <td>88.3</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Presence of any hemorrhage</td>
              <td>114 (12.4)</td>
              <td>93.0</td>
              <td>98.2</td>
              <td>87.6</td>
              <td>99.0</td>
              <td>97.5</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Validation cohort (n=399)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Anterior proximal occlusion<break/>  <break/>  </td>
              <td>50 (12.5)</td>
              <td>90.0</td>
              <td>97.4</td>
              <td>76.3</td>
              <td>98.5</td>
              <td>95.2</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Anterior distal occlusion<break/>  <break/>  </td>
              <td>61 (15.3)</td>
              <td>83.6</td>
              <td>97.7</td>
              <td>86.4</td>
              <td>97.1</td>
              <td>95.5</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Basilar occlusion<break/>  <break/>  </td>
              <td>7 (1.8)</td>
              <td>71.4</td>
              <td>98.2</td>
              <td>41.7</td>
              <td>99.5</td>
              <td>97.7</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Presence of established ischemia</td>
              <td>104 (26.1)</td>
              <td>80.8</td>
              <td>85.1</td>
              <td>64.1</td>
              <td>92.5</td>
              <td>83.2</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Presence of any hemorrhage<break/>  <break/>  </td>
              <td>25 (6.3)</td>
              <td>88.0</td>
              <td>96.0</td>
              <td>59.5</td>
              <td>99.2</td>
              <td>95.5</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table1fn1">
            <p><sup>a</sup>PPV: positive predictive value.</p>
          </fn>
          <fn id="table1fn2">
            <p><sup>b</sup>NPV: negative predictive value.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <p>The metrics for ASPECTS and collateral status are shown separately because data were incomplete (<xref ref-type="table" rid="table2">Table 2</xref>). Importantly, we found that the NLP tool was able to identify the reports with missing data with high accuracy. For example, information on ASPECTS was absent in 71.8% (661/921) of the reports in the derivation cohort and 68.99% (275/399) for the validation cohort. The tool accurately identified that this attribute was missing with a sensitivity and PPV of 99.7% and 99.7%, respectively, in the derivation cohort and 99.3% and 98.6%, respectively, in the validation cohort.</p>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Accuracy of the natural language processing tool CHARTextract to identify Alberta stroke program early CT score (ASPECTS) and collateral vascular status based on diagnostic imaging reports.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="30"/>
          <col width="260"/>
          <col width="140"/>
          <col width="0"/>
          <col width="120"/>
          <col width="120"/>
          <col width="100"/>
          <col width="100"/>
          <col width="0"/>
          <col width="100"/>
          <thead>
            <tr valign="top">
              <td colspan="3">Cohort and stroke-related attributes</td>
              <td>Attribute prevalence, n (%)</td>
              <td colspan="2">Sensitivity (%)</td>
              <td>Specificity (%)</td>
              <td>PPV<sup>a</sup> (%)</td>
              <td>NPV<sup>b</sup> (%)</td>
              <td colspan="2">Overall accuracy (%)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="11">
                <bold>Derivation cohort (n=921)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="9">
                <bold>ASPECTS</bold>
              </td>
              <td>98.8</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Not reported</td>
              <td colspan="2">661 (71.8)</td>
              <td>99.7</td>
              <td>99.2</td>
              <td>99.7</td>
              <td>99.2</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>&#60;5</td>
              <td colspan="2">30 (3.3)</td>
              <td>96.7</td>
              <td>99.2</td>
              <td>80.6</td>
              <td>99.9</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>≥5</td>
              <td colspan="2">230 (25.0)</td>
              <td>96.5</td>
              <td>99.7</td>
              <td>99.1</td>
              <td>98.9</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="9">
                <bold>Collateral status</bold>
              </td>
              <td>98.4</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Not reported</td>
              <td colspan="2">774 (84.0)</td>
              <td>99.2</td>
              <td>96.6</td>
              <td>99.4</td>
              <td>95.9</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Poor</td>
              <td colspan="2">34 (3.7)</td>
              <td>94.1</td>
              <td>100</td>
              <td>100</td>
              <td>99.8</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Intermediate</td>
              <td colspan="2">19 (2.1)</td>
              <td>78.9</td>
              <td>100</td>
              <td>100</td>
              <td>99.6</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Good</td>
              <td colspan="2">94 (10.2)</td>
              <td>96.8</td>
              <td>98.8</td>
              <td>90.1</td>
              <td>99.6</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td colspan="11">
                <bold>Validation cohort (n=399)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="9">
                <bold>ASPECTS</bold>
              </td>
              <td>98.5</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Not reported</td>
              <td colspan="2">275 (68.9)</td>
              <td>99.3</td>
              <td>96.8</td>
              <td>98.6</td>
              <td>98.4</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>&#60;5</td>
              <td colspan="2">10 (2.5)</td>
              <td>70.0</td>
              <td>100</td>
              <td>100.0</td>
              <td>99.2</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>≥5</td>
              <td colspan="2">114 (28.6)</td>
              <td>99.1</td>
              <td>99.3</td>
              <td>98.3</td>
              <td>99.6</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td colspan="9">
                <bold>Collateral status</bold>
              </td>
              <td>98.2</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Not reported</td>
              <td colspan="2">330 (82.7)</td>
              <td>99.7</td>
              <td>91.3</td>
              <td>98.2</td>
              <td>98.4</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Poor</td>
              <td colspan="2">15 (3.8)</td>
              <td>93.3</td>
              <td>99.7</td>
              <td>93.3</td>
              <td>99.7</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Intermediate</td>
              <td colspan="2">7 (1.8)</td>
              <td>71.4</td>
              <td>100</td>
              <td>100</td>
              <td>99.5</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Good</td>
              <td colspan="2">47 (11.8)</td>
              <td>93.6</td>
              <td>100</td>
              <td>100</td>
              <td>99.2</td>
              <td colspan="2">
                <break/>
              </td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table2fn1">
            <p><sup>a</sup>PPV: positive predictive value.</p>
          </fn>
          <fn id="table2fn2">
            <p><sup>b</sup>NPV: negative predictive value.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We showed that an NLP approach can automate data extraction from neuroimaging reports with moderately high accuracy, supporting its potential application for stroke surveillance, health system planning, and population-based clinical research. The PPV of CHARTextract to identify large vessel occlusion was 76.3%, meaning that of 100 reports identified to have a large vessel occlusion, there were 24 false-positive cases, but the sensitivity, specificity, and NPV were over 90%, indicating the prevalence of fewer false-negative cases. Thus, NLP may be a helpful screening tool for case finding purposed when using a large dataset.</p>
        <p>Although we did not formally record the time required for data abstraction, the abstractors estimate an average review time of 5 minutes per chart, which adds to 110 hours of sustained attention to review a total of 1320 charts. On the other hand, once the rule sets have been developed, the NLP tool can extract the requested variables within seconds.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>There are several limitations of NLP that are worth discussing. First, the NLP approach can only extract information from the radiologist’s reported interpretation of diagnostic images, and it is not designed to be directly used for imaging interpretation [<xref ref-type="bibr" rid="ref4">4</xref>]. Although the tool was accurate in identifying which reports had missing data on ASPECTS and collateral status, information on these attributes was simply not obtainable without the direct assessment of the images. Second, each rule is applied at a sentence level so that the tool will not be able to capture attributes if keywords occur across different sentences. Third, the tool does not distinguish between homonyms in the English language. For instance, we experienced challenges with the word “ASPECT” used to describe the score and “aspect” used to describe a facet of the brain or a component of a blood vessel. Finally, the NLP approach is influenced by variations in reporting practices to describe imaging findings. This was most apparent in the evaluation of the presence of cerebral ischemia. The terms used to describe this attribute were less predictable and frequently contained ambiguous language such as “possible subtle hypodensity” or “cannot rule out early ischemia.” Interestingly, the cerebral ischemia attribute also had a lower inter-rater reliability between the chart abstractors compared to the other attributes evaluated. We noticed that the nonclinical research assistant, who has extensive experience with chart abstraction for stroke research, was more liberal in recording ischemia, whereas the stroke specialist was more selective in recording ischemia depending on the language used by the radiologist. In this situation, the application of NLP rule sets may improve the standardization of data collection. Finally, the current proof-of-concept study has a small sample size. External validation of our methods with a larger sample of radiology reports is needed to address the limitations arising from variation in reporting practices.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>NLP approaches can identify the presence of large vessel occlusion with high accuracy and have the potential to improve the efficiency of large-scale data collection from imaging reports. External validation of our approach is needed.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>CHARTextract tool rules.</p>
        <media xlink:href="medinform_v9i5e24381_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 1217 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ACA-A1</term>
          <def>
            <p>A1 segment of the anterior cerebral artery</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ASPECTS</term>
          <def>
            <p>Alberta stroke program early CT score</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CT</term>
          <def>
            <p>computed tomography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CTA</term>
          <def>
            <p>computed tomography angiography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CTP</term>
          <def>
            <p>computed tomography perfusion</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MCA-M1</term>
          <def>
            <p>M1 segment of the middle cerebral artery</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">NPV</term>
          <def>
            <p>negative predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was supported by the Sunnybrook Alternative Funding Plan Association through the Innovation Fund of the Alternative Funding Plan from the Academic Health Sciences Centres, Ontario, Canada. AY is supported by a national new investigator award from the Heart and Stroke Foundation of Canada. MKK is supported by a mid-career investigator award from the Heart and Stroke Foundation of Canada and holds the Lillian Love Chair in Women’s Health at the University Health Network.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krueger</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Koot</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>O'Callaghan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bayley</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Corbett</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Prevalence of individuals experiencing the effects of stroke in Canada: Trends and projections</article-title>
          <source>Stroke</source>
          <year>2015</year>
          <month>08</month>
          <volume>46</volume>
          <issue>8</issue>
          <fpage>2226</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.1161/STROKEAHA.115.009616</pub-id>
          <pub-id pub-id-type="medline">26205371</pub-id>
          <pub-id pub-id-type="pii">STROKEAHA.115.009616</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Menon</surname>
              <given-names>BK</given-names>
            </name>
            <name name-style="western">
              <surname>van Zwam</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Dippel</surname>
              <given-names>DWJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Demchuk</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Dávalos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Majoie</surname>
              <given-names>CBLM</given-names>
            </name>
            <name name-style="western">
              <surname>van der Lugt</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>de Miquel</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Donnan</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Roos</surname>
              <given-names>YBWEM</given-names>
            </name>
            <name name-style="western">
              <surname>Bonafe</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jahan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Diener</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>van den Berg</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>EI</given-names>
            </name>
            <name name-style="western">
              <surname>Berkhemer</surname>
              <given-names>OA</given-names>
            </name>
            <name name-style="western">
              <surname>Pereira</surname>
              <given-names>VM</given-names>
            </name>
            <name name-style="western">
              <surname>Rempel</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Millán</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Thornton</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Román</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Ribó</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Beumer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Stouch</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>BCV</given-names>
            </name>
            <name name-style="western">
              <surname>van Oostenbrugge</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Saver</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Jovin</surname>
              <given-names>TG</given-names>
            </name>
            <collab>HERMES collaborators</collab>
          </person-group>
          <article-title>Endovascular thrombectomy after large-vessel ischaemic stroke: A meta-analysis of individual patient data from five randomised trials</article-title>
          <source>Lancet</source>
          <year>2016</year>
          <month>04</month>
          <day>23</day>
          <volume>387</volume>
          <issue>10029</issue>
          <fpage>1723</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(16)00163-X</pub-id>
          <pub-id pub-id-type="medline">26898852</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(16)00163-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nogueira</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Jadhav</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Haussen</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Bonafe</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Budzik</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Bhuva</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yavagal</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Ribo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cognard</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hanel</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Sila</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Hassan</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Millan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>EI</given-names>
            </name>
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>English</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>QA</given-names>
            </name>
            <name name-style="western">
              <surname>Silver</surname>
              <given-names>FL</given-names>
            </name>
            <name name-style="western">
              <surname>Pereira</surname>
              <given-names>VM</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>BP</given-names>
            </name>
            <name name-style="western">
              <surname>Baxter</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Abraham</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Cardona</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Veznedaroglu</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hellinger</surname>
              <given-names>FR</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kirmani</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Lopes</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Jankowitz</surname>
              <given-names>BT</given-names>
            </name>
            <name name-style="western">
              <surname>Frankel</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Costalat</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vora</surname>
              <given-names>NA</given-names>
            </name>
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Malik</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Furlan</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rubiera</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Aghaebrahim</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Olivot</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tekle</surname>
              <given-names>WG</given-names>
            </name>
            <name name-style="western">
              <surname>Shields</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Graves</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Liebeskind</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Saver</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Jovin</surname>
              <given-names>TG</given-names>
            </name>
            <collab>DAWN Trial Investigators</collab>
          </person-group>
          <article-title>Thrombectomy 6 to 24 hours after stroke with a mismatch between deficit and infarct</article-title>
          <source>N Engl J Med</source>
          <year>2018</year>
          <month>01</month>
          <day>04</day>
          <volume>378</volume>
          <issue>1</issue>
          <fpage>11</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMoa1706442</pub-id>
          <pub-id pub-id-type="medline">29129157</pub-id>
          <pub-id pub-id-type="pii">10.1056/NEJMoa1706442</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Albers</surname>
              <given-names>GW</given-names>
            </name>
            <name name-style="western">
              <surname>Marks</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Kemp</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Ortega-Gutierrez</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>McTaggart</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Torbey</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Kim-Tenser</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Leslie-Mazwi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sarraj</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kasner</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Ansari</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Yeatts</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mlynash</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Heit</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Zaharchuk</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Carrozzella</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Palesch</surname>
              <given-names>YY</given-names>
            </name>
            <name name-style="western">
              <surname>Demchuk</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Bammer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lavori</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Broderick</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Lansberg</surname>
              <given-names>MG</given-names>
            </name>
            <collab>DEFUSE 3 Investigators</collab>
          </person-group>
          <article-title>Thrombectomy for stroke at 6 to 16 hours with selection by perfusion imaging</article-title>
          <source>N Engl J Med</source>
          <year>2018</year>
          <month>02</month>
          <day>22</day>
          <volume>378</volume>
          <issue>8</issue>
          <fpage>708</fpage>
          <lpage>718</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29364767"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMoa1713973</pub-id>
          <pub-id pub-id-type="medline">29364767</pub-id>
          <pub-id pub-id-type="pmcid">PMC6590673</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thomalla</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Simonsen</surname>
              <given-names>CZ</given-names>
            </name>
            <name name-style="western">
              <surname>Boutitie</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Andersen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Berthezene</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cheripelli</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Fazekas</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Fiehler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ford</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Galinovic</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Gellissen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Golsari</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gregori</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Günther</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Guibernau</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Häusler</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>Hennerici</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kemmling</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Marstrand</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Modrau</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Neeb</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Perez de la Ossa</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Puig</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ringleb</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Scheel</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Schonewille</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Serena</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sunaert</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Villringer</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wouters</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Thijs</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ebinger</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Endres</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fiebach</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Lemmens</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Muir</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Nighoghossian</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Pedraza</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gerloff</surname>
              <given-names>C</given-names>
            </name>
            <collab>WAKE-UP Investigators</collab>
          </person-group>
          <article-title>MRI-guided thrombolysis for stroke with unknown time of onset</article-title>
          <source>N Engl J Med</source>
          <year>2018</year>
          <month>08</month>
          <day>16</day>
          <volume>379</volume>
          <issue>7</issue>
          <fpage>611</fpage>
          <lpage>622</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMoa1804355</pub-id>
          <pub-id pub-id-type="medline">29766770</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ung</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Thrift</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Cadilhac</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Andrew</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Sundararajan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kapral</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Reeves</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kilkenny</surname>
              <given-names>MF</given-names>
            </name>
          </person-group>
          <article-title>Promising use of big data to increase the efficiency and comprehensiveness of stroke outcomes research</article-title>
          <source>Stroke</source>
          <year>2019</year>
          <month>05</month>
          <volume>50</volume>
          <issue>5</issue>
          <fpage>1302</fpage>
          <lpage>1309</lpage>
          <pub-id pub-id-type="doi">10.1161/STROKEAHA.118.020372</pub-id>
          <pub-id pub-id-type="medline">31009352</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>AY</given-names>
            </name>
            <name name-style="western">
              <surname>Holodinsky</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Zerna</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Svenson</surname>
              <given-names>LW</given-names>
            </name>
            <name name-style="western">
              <surname>Jetté</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Quan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>MD</given-names>
            </name>
          </person-group>
          <article-title>Use and utility of administrative health data for stroke research and surveillance</article-title>
          <source>Stroke</source>
          <year>2016</year>
          <month>07</month>
          <volume>47</volume>
          <issue>7</issue>
          <fpage>1946</fpage>
          <lpage>1952</lpage>
          <pub-id pub-id-type="doi">10.1161/strokeaha.116.012390</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Waqas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rai</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Vakharia</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chin</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Siddiqui</surname>
              <given-names>AH</given-names>
            </name>
          </person-group>
          <article-title>Effect of definition and methods on estimates of prevalence of large vessel occlusion in acute ischemic stroke: a systematic review and meta-analysis</article-title>
          <source>J Neurointerv Surg</source>
          <year>2020</year>
          <month>03</month>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>260</fpage>
          <lpage>265</lpage>
          <pub-id pub-id-type="doi">10.1136/neurintsurg-2019-015172</pub-id>
          <pub-id pub-id-type="medline">31444289</pub-id>
          <pub-id pub-id-type="pii">neurintsurg-2019-015172</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pons</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Braun</surname>
              <given-names>LMM</given-names>
            </name>
            <name name-style="western">
              <surname>Hunink</surname>
              <given-names>MGM</given-names>
            </name>
            <name name-style="western">
              <surname>Kors</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing in radiology: A systematic review</article-title>
          <source>Radiology</source>
          <year>2016</year>
          <month>05</month>
          <volume>279</volume>
          <issue>2</issue>
          <fpage>329</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.1148/radiol.16142770</pub-id>
          <pub-id pub-id-type="medline">27089187</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Obeid</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lenert</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing and machine learning algorithm to identify brain MRI reports with acute ischemic stroke</article-title>
          <source>PLoS One</source>
          <year>2019</year>
          <volume>14</volume>
          <issue>2</issue>
          <fpage>e0212778</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0212778"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0212778</pub-id>
          <pub-id pub-id-type="medline">30818342</pub-id>
          <pub-id pub-id-type="pii">PONE-D-18-24904</pub-id>
          <pub-id pub-id-type="pmcid">PMC6394972</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ong</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Orfanoudaki</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Caprasse</surname>
              <given-names>FPM</given-names>
            </name>
            <name name-style="western">
              <surname>Hutch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Fard</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Balogun</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>MI</given-names>
            </name>
            <name name-style="western">
              <surname>Minnig</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Saglam</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Prescott</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Greer</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Smirnakis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bertsimas</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Machine learning and natural language processing methods to identify ischemic stroke, acuity and location from radiology reports</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <volume>15</volume>
          <issue>6</issue>
          <fpage>e0234908</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0234908"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0234908</pub-id>
          <pub-id pub-id-type="medline">32559211</pub-id>
          <pub-id pub-id-type="pii">PONE-D-19-31481</pub-id>
          <pub-id pub-id-type="pmcid">PMC7304623</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <source>CHARTextract - Li Ka Shing Centre for Healthcare Analytics Research &#38; Training (LKS-CHART)</source>
          <year>2019</year>
          <access-date>2019-08-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://lks-chart.github.io/CHARTextract-docs/">https://lks-chart.github.io/CHARTextract-docs/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lakomkin</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Dhamoon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>IP</given-names>
            </name>
            <name name-style="western">
              <surname>Tuhrim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fifi</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Mocco</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Prevalence of large vessel occlusion in patients presenting with acute ischemic stroke: a 10-year systematic review of the literature</article-title>
          <source>J Neurointerv Surg</source>
          <year>2019</year>
          <month>03</month>
          <volume>11</volume>
          <issue>3</issue>
          <fpage>241</fpage>
          <lpage>245</lpage>
          <pub-id pub-id-type="doi">10.1136/neurintsurg-2018-014239</pub-id>
          <pub-id pub-id-type="medline">30415226</pub-id>
          <pub-id pub-id-type="pii">neurintsurg-2018-014239</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barber</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Demchuk</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Buchan</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Validity and reliability of a quantitative computed tomography score in predicting outcome of hyperacute stroke before thrombolytic therapy. ASPECTS Study Group. Alberta Stroke Programme Early CT Score</article-title>
          <source>Lancet</source>
          <year>2000</year>
          <month>05</month>
          <day>13</day>
          <volume>355</volume>
          <issue>9216</issue>
          <fpage>1670</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1016/s0140-6736(00)02237-6</pub-id>
          <pub-id pub-id-type="medline">10905241</pub-id>
          <pub-id pub-id-type="pii">S0140673600022376</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
