<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v7i3e13331</article-id>
      <article-id pub-id-type="pmid">31313661</article-id>
      <article-id pub-id-type="doi">10.2196/13331</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Improving the Efficacy of the Data Entry Process for Clinical Research With a Natural Language Processing–Driven Medical Information Extraction System: Quantitative Field Research</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Cui</surname>
            <given-names>Licong</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zheng</surname>
            <given-names>Jiaping</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="author" id="contrib1" equal-contrib="yes">
          <name name-style="western">
            <surname>Han</surname>
            <given-names>Jiang</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-4278-6617</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib2" equal-contrib="yes">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Ken</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-2053-917X</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib3">
          <name name-style="western">
            <surname>Fang</surname>
            <given-names>Lei</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-1438-2500</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib4">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Shaodian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-8514-9503</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib5">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Fei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-2212-3947</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib6">
          <name name-style="western">
            <surname>Ma</surname>
            <given-names>Handong</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-3945-2990</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib7">
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>Liebin</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-2137-0177</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib8" corresp="yes">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Shijian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Pediatric Translational Medicine Institute</institution>
            <institution>Shanghai Children’s Medical Center</institution>
            <institution>Shanghai Jiao Tong University School of Medicine</institution>
            <addr-line>1678 Dongfang Road, Pudong New Area</addr-line>
            <addr-line>Shanghai,</addr-line>
            <country>China</country>
            <phone>86 86 21 38625637</phone>
            <fax>86 86 21 38625637</fax>
            <email>arrow64@163.com</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-7050-463X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
      <label>1</label>
      <institution>Pediatric Translational Medicine Institute</institution>
      <institution>Shanghai Children’s Medical Center</institution>  
      <institution>Shanghai Jiao Tong University School of Medicine</institution>  
      <addr-line>Shanghai</addr-line>
      <country>China</country></aff>
      <aff id="aff2">
      <label>2</label>
      <institution>School of Public Health</institution>
      <institution>Shanghai Jiao Tong University School of Medicine</institution>  
      <addr-line>Shanghai</addr-line>
      <country>China</country></aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Synyi Research</institution>
        <addr-line>Shanghai</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
      <label>4</label>
      <institution>APEX Data and Knowledge Management Lab</institution>
      <institution>Shanghai Jiao Tong University</institution>  
      <addr-line>Shanghai</addr-line>
      <country>China</country></aff>
      <aff id="aff5">
      <label>5</label>
      <institution>Department of Healthcare Policy and Research</institution>
      <institution>Weill Cornell Medicine</institution>  
      <addr-line>New York, NY</addr-line>
      <country>United States</country></aff>
      <aff id="aff6">
      <label>6</label>
      <institution>Department of computer science</institution>
      <institution>Shanghai Jiao Tong University</institution>  
      <addr-line>Shanghai</addr-line>
      <country>China</country></aff>
      <aff id="aff7">
      <label>7</label>
      <institution>Child Health Advocacy Institute</institution>
      <institution>Shanghai Children’s Medical Center</institution>  
      <institution>Shanghai Jiao Tong University School of Medicine</institution>  
      <addr-line>Shanghai</addr-line>
      <country>China</country></aff>
      <author-notes>
        <corresp>Corresponding Author: Shijian Liu 
        <email>arrow64@163.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection"><season>Jul-Sep</season><year>2019</year></pub-date>
      <pub-date pub-type="epub">
        <day>16</day>
        <month>07</month>
        <year>2019</year>
      </pub-date>
      <volume>7</volume>
      <issue>3</issue>
      <elocation-id>e13331</elocation-id>
      <!--history from ojs - api-xml-->
      <history>
        <date date-type="received">
          <day>18</day>
          <month>1</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>28</day>
          <month>3</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>13</day>
          <month>5</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>29</day>
          <month>5</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Jiang Han, Ken Chen, Lei Fang, Shaodian Zhang, Fei Wang, Handong Ma, Liebin Zhao, Shijian Liu. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 16.07.2019.</copyright-statement>
      <copyright-year>2019</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2019/3/e13331/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The growing interest in observational trials using patient data from electronic medical records poses challenges to both efficiency and quality of clinical data collection and management. Even with the help of electronic data capture systems and electronic case report forms (eCRFs), the manual data entry process followed by chart review is still time consuming.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>To facilitate the data entry process, we developed a natural language processing–driven medical information extraction system (NLP-MIES) based on the i2b2 reference standard. We aimed to evaluate whether the NLP-MIES–based eCRF application could improve the accuracy and efficiency of the data entry process.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We conducted a randomized and controlled field experiment, and 24 eligible participants were recruited (12 for the manual group and 12 for NLP-MIES–supported group). We simulated the real-world eCRF completion process using our system and compared the performance of data entry on two research topics, pediatric congenital heart disease and pneumonia.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>For the congenital heart disease condition, the NLP-MIES–supported group increased accuracy by 15% (95% CI 4%-120%, <italic>P</italic>=.03) and reduced elapsed time by 33% (95% CI 22%-42%, <italic>P</italic>&lt;.001) compared with the manual group. For the pneumonia condition, the NLP-MIES–supported group increased accuracy by 18% (95% CI 6%-32%, <italic>P</italic>=.008) and reduced elapsed time by 31% (95% CI 19%-41%, <italic>P</italic>&lt;.001).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our system could improve both the accuracy and efficiency of the data entry process.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>electronic data capture</kwd>
        <kwd>electric medical records</kwd>
        <kwd>case report form</kwd>
        <kwd>natural language processing</kwd>
        <kwd>field research</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>According to ClinicalTrials.gov [<xref ref-type="bibr" rid="ref1">1</xref>], the number of clinical trials worldwide has increased exponentially in recent years. Clinicians and researchers use evidence from interventional and observational trials to determine the effectiveness of treatments or interventions. Interventional trials, such as randomized controlled trials, compare the efficacy of interventions under relatively ideal cohorts to get unbiased estimates of effects. However, reality is far more complicated, and these ideal cohorts limit generalizability of results obtained to broader patient populations and settings. Moreover, due to high expenses and the short research cycle, interventional trials could hardly provide evaluations of effectiveness and safety for large populations and long-term follow-ups. As supplements, many observational trials, such as retrospective cohort studies, cross-sectional studies, and real-world evidence studies, use patient historical data collected at the point of care to compare effectiveness and safety of treatments in clinical practice settings in nonexperimental ways. Such observational trials usually have larger cohort sizes and longer follow-up periods. Growing interest in using these approaches poses new challenges to effective and efficient collection of patient electronic medical records (EMRs).</p>
      <p>Manual data entry based on paper-and-pen case report forms (CRFs) followed by chart review is the conventional way of clinical trial data collection. With the development of health care information technology, electronic data capture (EDC) systems, which accelerate the data collection process and assure data quality with real-time data entry, review, analysis, and verification [<xref ref-type="bibr" rid="ref2">2</xref>], emerge as a timely solution that is in high demand. Driven by the prevalent use of EDC systems, CRFs gradually transitioned from paper to electronic forms [<xref ref-type="bibr" rid="ref3">3</xref>]. Many studies have suggested that data entry using electronic CRF (eCRF) applications of EDC systems could achieve higher efficiency and accuracy at a lower cost than the conventional paper-and-pen approach [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. However, neither EDC nor eCRF fundamentally changed the essential ways of how the data are collected. Especially for observational trials using patient data, researchers still need to manually transcribe the data one by one from EMRs. The data entry process takes time and becomes a significant efficiency bottleneck.</p>
      <p>The 2018 guidance from the US Food and Drug Administration [<xref ref-type="bibr" rid="ref9">9</xref>] emphasized the importance of interoperability between electronic health records (EHRs) and EDCs. It also promoted the idea of secondary use of source data at the time of care to prepopulate eCRFs without specific user efforts. The guidance focused more on the use of structured data, such as demographics, vital signs, and laboratory data, but little on the use of unstructured clinical narratives, which account for about 80% of the patient care information [<xref ref-type="bibr" rid="ref10">10</xref>]. To achieve data interoperability for these unstructured narratives, many EDC systems created predesigned patient information templates including standardized documentation or forms for coded data entry in lieu of free text documentation to structuralize the medical records [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. Clinicians record patient information under the guidance of these templates, and at the same time the system stored the coded data from templates for future analysis. Patient information templates can help data collection for research and patient care, integrate EDC and EMRs, and automatically prepopulate the eCRF. However, limitations of the templates were obvious. For clinicians, the one-size-fits-all templates restricted freedom of expression. For researchers, the predesigned data elements limited usability of the data in different research topics.</p>
      <p>The development of natural language processing (NLP) technologies provides new potential for better secondary use of free unstructured EMR data. Informatics for integrating biology and the bedside (i2b2) has posed NLP challenges to extract information, including clinical finding, test, treatment, medication, clinical event, and time information, from clinical notes and discharge summaries [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref16">16</xref>] and promoted a series of commercial medical applications focusing on post hoc structuralization of medical records [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. Nonetheless, as one of the main topics on secondary use of patient EMR, unstructured data collection based on NLP technology has not been well studied.</p>
      <p>In order to fill in this gap, we developed an NLP-driven medical information extraction system (NLP-MIES) based on i2b2 reference standards for concept extraction, assertion, and relation classification. After manually constructing eCRFs and binding data elements using concepts from the Systematized Nomenclature of Medicine–Clinical Terms (SNOMED-CT) or the radiology-specific ontology (RadLex) developed by the Radiological Society of North America, our system can scan clinical notes and image diagnostic reports, find related medical concepts, and automatically prepopulate data elements with associated values. To further compare the accuracy and efficiency between manual data entry and NLP technology–supported data entry, we conducted a randomized and controlled field experiment. We created a mock-up eCRF application that enables users to review medical records and enter, modify, and verify the data prepopulated by NLP-MIES. We recruited clinicians and researchers to use the application to finish a certain amount of simply designed eCRFs in the limited time. Based on these designs, we simulated a real-world eCRF filling process and aimed to quantitatively evaluate how NLP technologies could improve efficacy of data collection of clinical research and identify potential problems that are not neglectable in future NLP-driven EDC design.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Natural Language Processing–Driven Medical Information Extraction System</title>
        <p>We leveraged the methods developed for the 2010 i2b2/Veterans Affairs (VA) challenge as the primary reference for Chinese medical NLP machine learning practices in NLP-MIES, which includes Chinese word segmentation, named entity recognition, assertion classification, and relation extraction [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. On the basis of the predefined entities (medical problems, tests, treatments) and relation types (medical problems and treatments, medical problems and tests, medical problems and other medical problems) from the 2010 i2b2/VA challenge, in order to extract more information from medical records, we added four new entities (body structure, observable, qualifier, value) and four new types of relations (body structures and observables, medical problems and observables, observables and qualifiers, observables and values). After preprocessing by an associated value dimension algorithm [<xref ref-type="bibr" rid="ref23">23</xref>], entities from medical texts can be rearranged according to their relations. We then adopted an improved longest common subsequence algorithm to map these aligned entities and relations into Chinese SNOMED-CT and RadLex concepts and synonyms [<xref ref-type="bibr" rid="ref24">24</xref>]. <xref ref-type="fig" rid="figure1">Figure 1</xref> shows the overall workflow of NLP-MIES.</p>
      </sec>
      <sec>
        <title>Electronic Clinical Research Form</title>
        <p>We constructed simple eCRFs for two disease conditions (pediatric congenital heart disease and pneumonia) to evaluate the efficacy of NLP-MIES. To make the eCRFs closer to the real ones, we invited clinical researchers from the departments of pediatric cardiothoracic surgery and pediatric respiratory medicine to help design the eCRFs. The types of CRF data elements include true-false (participant judges whether a certain condition or medical problem exists, doesn’t exist, or is not mentioned in a certain case and chooses the button accordingly—for example, patient had a disturbance of consciousness: true, false, or not mentioned); multiple choice (participant should click the button corresponding to one or more conditions or medical problems associated with a certain patient—for example, which of the following are the chief complaints of the patient: cardiac murmur, cyanosis, or dyspnea); and fill-in-the-blank (participant should enter the value for each data element—for example, the lesion size of ventricular septal defect is ___ cm). <xref ref-type="fig" rid="figure2">Figures 2</xref> and <xref ref-type="fig" rid="figure3">3</xref> show examples of eCRF design.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Workflow of the natural language processing–driven medical information extraction system. EMR: electronic medical record; NLP: natural language processing; eCRF: electronic case report form.</p>
          </caption>
          <graphic xlink:href="medinform_v7i3e13331_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Electronic case report form design for congenital heart disease.</p>
          </caption>
          <graphic xlink:href="medinform_v7i3e13331_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Electronic case report form design for pneumonia.</p>
          </caption>
          <graphic xlink:href="medinform_v7i3e13331_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>We further divided the data element true-false into two parts based on where the elements should be retrieved from: admission records (true-false I) or imaging reports (true-false II). All data elements were bound with SNOMED-CT or RadLex concepts and relations, such as disturbance of consciousness (concept, medical problem, SNOMED-CT ID: 3006004), cardiac murmur (concept, medical problem, SNOMED-CT ID: 42842009), lesion size (concept, observable, SNOMED-CT ID: 246116008) of (relation, medical problems and observables) ventricular septal defect (concept, medical problem, RadLex ID: RID3277).</p>
      </sec>
      <sec>
        <title>Medical Text From the Electronic Medical Record System</title>
        <p>For the congenital heart disease condition, we included admission records and ultrasonic cardiogram reports from pediatric patients aged 2 hours to 14 years with congenital heart disease (including atrial septal defect, ventricular septal defect, patent ductus arteriosus, patent foramen ovale, etc) attending the department of cardiothoracic surgery of Shanghai Children’s Medical Center from July 1, 2016, to July 1, 2017.</p>
        <p>For the pneumonia condition, we included admission records and chest x-ray reports from pediatric patients aged 6 months to 14 years with pneumonia (including bronchopneumonia, viral pneumonia, bacterial pneumonia, mycoplasma pneumonia, lobar pneumonia, lobular pneumonia, etc) attending the department of respiratory medicine of Shanghai Children’s Medical Center from July 1, 2016, to July 1, 2017.</p>
        <p>All medical texts were from the EMR system of Shanghai Children’s Medical Center and were de-identified. We randomly selected 60 patient cases for each condition. A total of 120 cases and 240 medical texts were included.</p>
      </sec>
      <sec>
        <title>System Functions and Human-Computer Interaction</title>
        <p>We developed a graphical user interface for easy browsing of imported patient medical texts as shown in <xref ref-type="fig" rid="figure4">Figure 4</xref>. User can see imported admission records, imaging reports, and eCRFs on the screen. When NLP-MIES was enabled, our system automatically scanned the texts, found medical concepts mentioned in raw texts, identified assertion or value information, and prepopulated the data elements accordingly. Our system recorded the raw text location where each medical concept was extracted. When necessary, user could directly click the “back to” button to highlight the location for further data verification. Each eCRF was divided into three or four parts according to the types of data elements (<xref ref-type="fig" rid="figure2">Figures 2</xref> and <xref ref-type="fig" rid="figure3">3</xref>). During the experiment, the elapsed time for finishing each part was automatically recorded by system.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Graphic user interface for electronic case report form (eCRF) data entry.</p>
          </caption>
          <graphic xlink:href="medinform_v7i3e13331_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Gold Standard</title>
        <p>The ground truth results of eCRFs for all 120 cases were provided by three clinical researchers involved in the eCRF design. We used a two-step strategy to create our gold standard. First, two invited researchers independently extracted data from medical texts and populated eCRFs using an eCRF application but without the support of NLP-MIES. Our system automatically recorded the populated values and elapsed time for each data entry. Second, for pairs in which the two researchers did not have complete agreement, a third researcher resolved inconsistent data extraction between the two researchers.</p>
      </sec>
      <sec>
        <title>Study Design</title>
        <p>We conducted a randomized and controlled field experiment at Shanghai Children’s Medical Center to evaluate whether the NLP-MIES group was more effective and efficient than the manual group in the data entry process of eCRF. Participants holding medical degrees, having clinical research experience, or working as clinicians were eligible for inclusion and recruited in this study. The study was approved by the Human Research Ethics Committees of Shanghai Children’s Medical Center. Written informed consent was obtained from all participants prior to randomization.</p>
        <p>We randomly allocated the volunteers to two groups by using a completely randomized digital table:</p>
        <list list-type="bullet">
          <list-item>
            <p>Manual group: participants should check the data elements in the eCRF, find related information in the medical text, and click or enter values accordingly.</p>
          </list-item>
          <list-item>
            <p>NLP-MIES–supported group: NLP-MIES prepopulated the data elements in the eCRF. Participants should check the data elements, find related information in the medical text, and verify or correct values accordingly.</p>
          </list-item>
        </list>
        <p>Before the experiment, all participants were authorized and trained to use the system and eCRF-based data entry. We chose a relatively quiet place for the experiment to reduce the potential effect of other environmental factors. Each participant was provided with a laptop and asked to complete all cases from 2:00 pm to 5:00 pm. Participants failing to complete the eCRFs in that time frame were excluded from the data analysis. The order of the 120 cases was randomly shuffled for each participant.</p>
      </sec>
      <sec>
        <title>Outcomes and Statistical Analysis</title>
        <p>We calculated average accuracy and elapsed time for each participant to finish all assigned eCRFs and compared the differences between the manual and NLP-MIES–supported group. To further analyze data entry errors made by participants under the support of NLP-MIES, we performed a post hoc error analysis for the results provided by NLP-MIES–supported group. We calculated the percentages of two types of data entry errors: error with modification and error without modification. We defined an error with modification as a data entry error made when a participant incorrectly modified a prepopulated result and an error without modification as a data entry error made when a participant kept an incorrect prepopulated result.</p>
        <p>Educational and psychological studies have indicated that the distributions of the measurements of how many points participants could get in a certain test and how much time it would take a participant to respond to a certain stimulus (reaction time) were right-skewed [<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]. Thus, we expected the data for each participant’s average accuracy and elapsed time for finishing eCRFs would not be normally distributed and described them using their median and interquartile range. To evaluate the differences between groups, we made a logarithmic transformation of the data and performed independent group <italic>t</italic> tests with SAS 9.2 (SAS Institute) software. <italic>P</italic> value, logarithmic mean difference (MD), ratio of change in geometric mean (exponential of logarithmic mean difference), and corresponding 95% confidence interval were calculated [<xref ref-type="bibr" rid="ref28">28</xref>]. We considered two-sided <italic>P</italic> values &lt;.05 as statistically significant.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Participant Characteristics</title>
        <p>We recruited a total of 24 eligible participants, 12 for the manual group and 12 for the NLP-MIES–supported group. All the participants successfully completed the eCRFs within the required time. The mean age of participants was 24.66 (SD 2.30) years (manual group 24.70 [SD 2.47] years, NLP-MIES group 24.48 [SD 2.36] years; <italic>P</italic>=.73); 33% (8/24) of participants were men and 67% (16/24) were women. There were no significant differences between the characteristics of the participants in the two groups.</p>
        <p>The overall interoperator consistency rate was 96.85% (1627/1680) for the congenital heart disease condition and 94.82% (1081/1440) for the pneumonia condition (<xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
      <sec>
        <title>Accuracy</title>
        <p>The overall average accuracy for the congenital heart disease and pneumonia eCRFs was significantly higher in the NLP-MIES–supported group than the manual group (congenital heart disease, <italic>P</italic>=.03; pneumonia, <italic>P</italic>=.008; <xref ref-type="table" rid="table1">Table 1</xref>). For the congenital heart disease eCRFs, the logarithmic MD of average accuracy between groups was 0.14 (95% CI 0.03-0.25), corresponding to an increase of 15% (95% CI 4%-120%) in geometric mean. Similarly, for the pneumonia eCRFs, the logarithmic MD was 0.17 (95% CI 0.06-0.28), corresponding to an increase of 18% (95% CI 6%-32%) in geometric mean. Comparing by types of data elements, the average accuracy was significantly higher in the NLP-MIES–supported group for all types except true-false II and fill-in-the-blank on the congenital heart disease eCRFs. The average accuracy of NLP-MIES prepopulation was slightly higher than median average accuracy of the manual group but lower than that of the NLP-MIES–supported group for most data element types.</p>
      </sec>
      <sec>
        <title>Elapsed Time</title>
        <p>The overall average time elapsed for congenital heart disease and pneumonia eCRFs was significantly lower in the NLP-MIES–supported group than the manual group (congenital heart disease, <italic>P</italic>&lt;.001; pneumonia, <italic>P</italic>&lt;.001; <xref ref-type="table" rid="table2">Table 2</xref>). For the congenital heart disease eCRFs, the logarithmic MD of average time elapsed was –0.40 (95% CI –0.55 to –0.25), corresponding to a reduction of 33% (95% CI 22% to 42%) in geometric mean. For the pneumonia eCRFs, the logarithmic MD was –0.37 (95% CI –0.53 to –0.21), corresponding to a reduction of 31% (95% CI 19% to 41%) in geometric mean. Comparing by types of data elements, the average elapsed time was significantly lower in the NLP-MIES–supported group for all types.</p>
      </sec>
      <sec>
        <title>Error Analysis</title>
        <p>Post hoc error analysis showed that errors without modification held the majority of error cases in all types of data elements (<xref ref-type="table" rid="table3">Table 3</xref>), and the overall percentage of errors without modification was almost 2.5 time higher than the percentage of errors with modification.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Average accuracy for electronic case report form data entry.</p>
          </caption>
          <table width="1000" cellpadding="8" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="150"/>
            <col width="100"/>
            <col width="170"/>
            <col width="150"/>
            <col width="150"/>
            <col width="180"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Type of disease and data element</td>
                <td>NLP<sup>a</sup> only</td>
                <td>Manual group (median, IQR<sup>b</sup>)</td>
                <td>NLP-MIES<sup>c</sup> group (median, IQR)</td>
                <td>Logarithmic mean difference (95% CI)</td>
                <td>Ratio of change in geometric mean (95% CI)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3"><bold>Congenital heart disease</bold></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>True-false I<sup>d</sup></td>
                <td>97.50</td>
                <td>79.17 (66.74, 84.17)</td>
                <td>96.81 (95.69, 97.29)</td>
                <td>0.41 (0.04 to 0.79)</td>
                <td>1.51 (1.03 to 2.20)</td>
                <td>.04</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>True-false II<sup>e</sup></td>
                <td>92.00</td>
                <td>95.39 (92.67, 95.89)</td>
                <td>97.78 (97.19, 98.44)</td>
                <td>0.21 (–0.01 to 0.10)</td>
                <td>1.10 (0.99 to 1.24)</td>
                <td>.10</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Multiple choice</td>
                <td>89.33</td>
                <td>82.80 (73.13, 85.83)</td>
                <td>95.00 (94.58, 97.42)</td>
                <td>0.29 (0.10 to 0.49)</td>
                <td>1.34 (1.10 to 1.63)</td>
                <td>.009</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Fill-in-the-blank</td>
                <td>94.17</td>
                <td>96.33 (95.25, 97.00)</td>
                <td>97.00 (95.83, 97.42)</td>
                <td>0.01 (–0.01 to 0.02)</td>
                <td>1.01 (0.99 to 1.02)</td>
                <td>.22</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Overall</td>
                <td>92.77</td>
                <td>90.42 (87.75, 92.68)</td>
                <td>97.17 (96.83, 97.44)</td>
                <td>0.14 (0.03 to 0.25)</td>
                <td>1.15 (1.04 to 2.20)</td>
                <td>.03</td>
              </tr>
              <tr valign="top">
                <td colspan="2"><bold>Pneumonia</bold></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>True-false I</td>
                <td>88.00</td>
                <td>70.83 (65.25, 77.75)</td>
                <td>88.17 (87.25, 89.00)</td>
                <td>0.30 (0.11 to 0.50)</td>
                <td>1.35 (1.11 to 1.65)</td>
                <td>.009<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>True-false II</td>
                <td>94.44</td>
                <td>91.25 (88.26, 93.78)</td>
                <td>95.83 (95.21, 96.81)</td>
                <td>0.11 (0.01 to 0.21)</td>
                <td>1.12 (1.01 to 1.23)</td>
                <td>.04</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Multiple choice</td>
                <td>80.83</td>
                <td>67.50 (50.21, 72.50)</td>
                <td>81.25 (77.92, 85.00)</td>
                <td>0.33 (0.14 to 0.52)</td>
                <td>1.39 (1.15 to 1.68)</td>
                <td>.003<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Overall</td>
                <td>84.15</td>
                <td>84.21 (80.53, 86.23)</td>
                <td>92.19 (91.49, 93.20)</td>
                <td>0.17 (0.06 to 0.28)</td>
                <td>1.18 (1.06 to 1.32)</td>
                <td>.008</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>NLP: natural language processing.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>IQR: interquartile range.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>NLP-MIES: NLP-driven medical information extraction system.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>True-false I: data elements retrieved from admissions records.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>True-false II: data elements retrieved from imaging reports (ultrasonic cardiogram or chest x-ray).</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>Independent group <italic>t</italic> test.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Average elapsed time for electronic case report form data entry.</p>
          </caption>
          <table width="1000" cellpadding="8" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="200"/>
            <col width="180"/>
            <col width="180"/>
            <col width="160"/>
            <col width="180"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Type of disease and data element</td>
                <td>Manual group seconds (median, IQR<sup>a</sup>)</td>
                <td>NLP-MIES<sup>b</sup> group seconds (median, IQR)</td>
                <td>Logarithmic mean difference (95% CI)</td>
                <td>Ratio of change in geometric mean (95% CI)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2"><bold>Congenital heart disease</bold></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>True-false I<sup>c</sup></td>
                <td>26.43 (21.43, 30.24)</td>
                <td>13.84 (11.83, 16.06)</td>
                <td>–0.71 (–1.02 to –0.39)</td>
                <td>0.49 (0.36 to 0.68)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>True-false II<sup>d</sup></td>
                <td>49.48 (43.08, 51.44)</td>
                <td>35.47 (31.34, 38.63)</td>
                <td>–0.29 (–0.46 to –0.11)</td>
                <td>0.75 (0.63 to 0.89)</td>
                <td>.003</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Multiple choice</td>
                <td>9.70 (10.61, 12.29)</td>
                <td>7.34 (7.47, 8.55)</td>
                <td>–0.36 (–0.53 to –0.19)</td>
                <td>0.70 (0.59 to 0.82)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Fill-in-the-blank</td>
                <td>18.41 (17.35, 19.60)</td>
                <td>12.38 (11.38, 14.70)</td>
                <td>–0.34 (–0.50 to –0.17)</td>
                <td>0.71 (0.60 to 0.84)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Overall</td>
                <td>103.79 (94.59, 109.39)</td>
                <td>69.73 (60.91, 79.66)</td>
                <td>–0.40 (–0.55 to –0.25)</td>
                <td>0.67 (0.58 to 0.78)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2"><bold>Pneumonia</bold></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>True-false I</td>
                <td>28.71 (25.61, 32.61)</td>
                <td>15.82 (14.36, 16.88)</td>
                <td>–0.64 (–0.97 to –0.30)</td>
                <td>0.53 (0.38 to 0.74)</td>
                <td>.001</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>True-false II</td>
                <td>31.59 (28.29, 32.49)</td>
                <td>25.22 (22.07, 28.80)</td>
                <td>–0.19 (–0.35 to –0.03)</td>
                <td>0.83 (0.71 to 0.97)</td>
                <td>.02</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Multiple choice</td>
                <td>11.02 (10.65, 12.05)</td>
                <td>8.61 (8.05, 9.25)</td>
                <td>–0.33 (–0.51 to –0.15)</td>
                <td>0.72 (0.60 to 0.86)</td>
                <td>.001</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Overall</td>
                <td>73.28 (65.80, 74.47)</td>
                <td>49.42 (44.33, 53.88)</td>
                <td>–0.37 (–0.53 to –0.21)</td>
                <td>0.69 (0.59 to 0.81)</td>
                <td>&lt;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>IQR: interquartile range.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>NLP-MIES: NLP-driven medical information extraction system.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>True-false I: data elements retrieved from admissions records.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>True-false II: data elements retrieved from imaging reports (ultrasonic cardiogram or chest x-ray).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Error analysis for natural language processing–driven medical information extraction system–supported data entry.</p>
          </caption>
          <table width="1000" cellpadding="8" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="230"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td>Types</td>
                <td colspan="4">Errors, n (%)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>True-false (n=1167)</td>
                <td>Multiple choice (n=439)</td>
                <td>Fill-in-the-blank (n=121)</td>
                <td>Total (N=1727)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Errors with modification</td>
                <td>325 (27.85)</td>
                <td>158 (36.00)</td>
                <td>16 (13.22)</td>
                <td>499 (28.89)</td>
              </tr>
              <tr valign="top">
                <td>Errors without modification</td>
                <td>842 (72.15)</td>
                <td>281 (64.01)</td>
                <td>105 (86.78)</td>
                <td>1228 (71.11)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this field experiment, we created a mock-up eCRF application with NLP-supported data entry and simulated a real-world eCRF completion process. Results showed a consistent trend across all eCRF topics and data element types indicating NLP-MIES could significantly improve the accuracy and efficiency of data entry. In quantitative evaluation, data entry under the support of NLP-MIES could increase accuracy by approximately (relative change in geometric mean is similar to the change in arithmetic mean) [<xref ref-type="bibr" rid="ref29">29</xref>] 15% to 18% and reduce elapsed time by one-third.</p>
        <p>Many potential factors could contribute to the increased accuracy and efficiency of NLP-MIES–aided data entry. First, we considered NLP-MIES–aided data entry as in essence a process of double-checking—an NLP-MIES check followed by a manual check. In clinical practice, double-checking is a widely used and trusted approach that could significantly reduce medical errors [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. Second, we tried several ways to establish participant trust in NLP-MIES: ensuring NLP-MIES entry accuracy (not worse or even better than manual entry), providing better interpretability (one-click back to raw text), and simplifying system interaction [<xref ref-type="bibr" rid="ref28">28</xref>]. Third, the overall time elapsed for the manual group was about 50% more than the NLP-MIES–supported group. In our study, higher accuracy was achieved for pneumonia cases than congenital heart disease cases; it may be that extracted information on congenital heart disease cases was more complicated than that of pneumonia cases.</p>
        <p>In our post hoc error analysis, we considered errors with modification as cognitive errors. Participants made cognitive errors because they failed to find correct answers (due to limitation of knowledge or lack of training) even though they noticed prepopulated answers were wrong. We considered most errors without modification as commission errors. Participants made commission errors because they followed the prepopulated answers that were incorrect. The result of error analysis indicated that commission errors dominated the data entry quality under the support of NLP-MIES. Overreliance could be a key factor for commission errors and as a side effect of participant trust in NLP-MIES [<xref ref-type="bibr" rid="ref29">29</xref>]. One possible solution to this problem could be to use NLP-MIES as an independent investigator. In real-world clinical research data management, at least two investigators independently enter data for each case to reduce commission errors and then submit the entries to the clinical research associates (CRAs). The CRAs review and verify the entries to ensure data completeness and quality [<xref ref-type="bibr" rid="ref30">30</xref>]. In our scenario, the NLP system could act as an independent investigator and provide data entry directly to CRAs rather than prepopulate data for other investigators, and CRAs could make final decisions based on both NLP-MIES–supported and manual entries.</p>
      </sec>
      <sec>
        <title>Strengths and Limitations</title>
        <p>As far as we know, this is the first study, especially in Chinese language settings, that quantitatively evaluated how NLP technologies could improve the efficiency and efficacy of data collection of clinical research. We believe NLP technologies would be a vital link in the great chain of data exchange between EHRs and EDC. It can potentially extract and transform data from medical text in real time and pose fewer restrictions on clinician freedom of expressions and workflows. In addition, our mock-up NLP-driven eCRF application provided graphical user interface for easy browsing and validation of source text data and data entries to ensure data quality. We believe that the results of our study can provide guidance of future research and development of NLP-driven EDC systems as well as the integration of EDC and EMR systems.</p>
        <p>Although the results of our field experiment demonstrated beneficial outcomes for NLP-MIES–supported data entry, there were limitations. First, we did not evaluate the efficacy of NLP-MIES under different prepopulation. Early research has indicated that improving accuracy of the automation system itself may not necessarily improve the performance of human-computer collaboration [<xref ref-type="bibr" rid="ref31">31</xref>]. Moreover, some studies suggest that automation systems with low accuracy can affect human-computer collaboration and trust [<xref ref-type="bibr" rid="ref32">32</xref>]. Second, there might be significant differences between our eCRFs and real-world CRFs in contents and types of data elements. Thus, it is inappropriate to extrapolate our quantitative results to real-world settings. Third, since NLP-MIES was designed for Chinese medical records and tested in Chinese eCRFs only, the efficiency of this methodology based on the i2b2 reference standard needs further evaluation in other languages.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, we developed an NLP-driven medical information extraction system based on i2b2 reference standards to facilitate the data entry process of eCRFs for clinical research. We conducted a randomized and controlled field experiment to simulate a real-world data entry process and evaluated the efficacy of our system. The results of our study showed NLP-MIES could significantly improve the accuracy and efficiency of data entry.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <app id="app1">
        <title>Multimedia Appendix 1</title>
        <p>Interoperator agreement and elapsed time for each electronic case report form topic.</p>
        <media xlink:href="medinform_v7i3e13331_app1.pdf" xlink:title="PDF File (Adobe PDF File), 36KB"/>
      </app>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CRA</term>
          <def>
            <p>clinical research associate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CRF</term>
          <def>
            <p>case report form</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">eCRF</term>
          <def>
            <p>electronic case report form</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EDC</term>
          <def>
            <p>electronic data capture</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">i2b2</term>
          <def>
            <p>informatics for integrating biology and the bedside</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">IQR</term>
          <def>
            <p>interquartile range</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">MD</term>
          <def>
            <p>mean difference</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">NLP-MIES</term>
          <def>
            <p>natural language processing–driven medical information extraction system</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">SNOMED-CT</term>
          <def>
            <p>Systematized Nomenclature of Medicine–Clinical Terms</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">VA</term>
          <def>
            <p>Veterans Affairs</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We would like to thank Gen Gu (software development engineer—natural language processing, Synyi Research, Shanghai, China), Junjie Cai (software development engineer—machine learning, Synyi Research), and Xiaopeng Jia (software development engineer—backend, Synyi Research) for their help and advice during the development of NLP-MIES and the eCRF application. This work was supported by the Shanghai Collaborative Innovation Center for Translational Medicine (TM201720), National Science Foundation of China (81872637, 81728017, 81602868), Shanghai Municipal Commission of Health and Family Planning (201840324, 20164Y0095), National Science and Technology Commission for the Association of Diabetes and Nutrition in Adolescents (2016YFC1305203), Shanghai Children’s Health Service Capacity Construction (GDEK201708), National Human Genetic Resources Sharing Service Platform (2005DKA21300), Science and Technology Development Program of Pudong Shanghai New District (PKJ2017-Y01), Medical and Engineering Cooperation Project of Shanghai Jiao Tong University (YG2017ZD15), Shanghai Professional and Technical Services Platform (18DZ2294100), and the 2019 Science and Technology Innovation–Biomedical Supporting Program of the Shanghai Science and Technology Committee (19441904400).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>JH and KC drafted the manuscript and contributed equally to this work. SL, KC, and SZ designed the study. JH and LF collected the data. SL and LZ obtained the funding. LF and KC were involved in data cleaning and verification, and KC analyzed the data. SL, KC, LZ, and FW contributed to the interpretation of the results and critical revision of the manuscript for important intellectual content. SL had the primary responsibility for the final content. All authors have read and approved the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <source>ClinicalTrials.gov</source>
          <access-date>2019-06-12</access-date>
          <comment>Trends, charts, and maps 
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://clinicaltrials.gov/ct2/resources/trends">https://clinicaltrials.gov/ct2/resources/trends</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="75C1n01tO"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Walther</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hossin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Townend</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Abernethy</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parker</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jeffries</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Comparison of electronic data capture (EDC) with the standard data capture method for clinical trial data</article-title>
          <source>PLoS One</source>
          <year>2011</year>
          <volume>6</volume>
          <issue>9</issue>
          <fpage>e25348</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0025348"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0025348</pub-id>
          <pub-id pub-id-type="medline">21966505</pub-id>
          <pub-id pub-id-type="pii">PONE-D-11-05243</pub-id>
          <pub-id pub-id-type="pmcid">PMC3179496</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bellary</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Krishnankutty</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Latha</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Basics of case report form designing in clinical research</article-title>
          <source>Perspect Clin Res</source>
          <year>2014</year>
          <month>10</month>
          <volume>5</volume>
          <issue>4</issue>
          <fpage>159</fpage>
          <lpage>166</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.picronline.org/article.asp?issn=2229-3485;year=2014;volume=5;issue=4;spage=159;epage=166;aulast=Bellary"/>
          </comment>
          <pub-id pub-id-type="doi">10.4103/2229-3485.140555</pub-id>
          <pub-id pub-id-type="medline">25276625</pub-id>
          <pub-id pub-id-type="pii">PCR-5-159</pub-id>
          <pub-id pub-id-type="pmcid">PMC4170533</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fleischmann</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Decker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kraft</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mai</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Mobile electronic versus paper case report forms in clinical trials: a randomized controlled trial</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2017</year>
          <month>12</month>
          <day>01</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>153</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-017-0429-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12874-017-0429-y</pub-id>
          <pub-id pub-id-type="medline">29191176</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12874-017-0429-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC5709849</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dillon</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Pirie</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rice</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pomilla</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sandhu</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Motala</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>EH</given-names>
            </name>
            <collab>African Partnership for Chronic Disease Research (APCDR)</collab>
          </person-group>
          <article-title>Open-source electronic data capture system offered increased accuracy and cost-effectiveness compared with paper methods in Africa</article-title>
          <source>J Clin Epidemiol</source>
          <year>2014</year>
          <month>12</month>
          <volume>67</volume>
          <issue>12</issue>
          <fpage>1358</fpage>
          <lpage>1363</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0895-4356(14)00238-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2014.06.012</pub-id>
          <pub-id pub-id-type="medline">25135245</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(14)00238-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC4271740</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ene-Iordache</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Carminati</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Antiga</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rubis</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ruggenenti</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Remuzzi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Remuzzi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Developing regulatory-compliant electronic case report forms for clinical trials: experience with the demand trial</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2009</year>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>404</fpage>
          <lpage>408</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19261946"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M2787</pub-id>
          <pub-id pub-id-type="medline">19261946</pub-id>
          <pub-id pub-id-type="pii">M2787</pub-id>
          <pub-id pub-id-type="pmcid">PMC2732224</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Le Jeannic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Quelen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Alberti</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Durand-Zaleski</surname>
              <given-names>I</given-names>
            </name>
            <collab>CompaRec Investigators</collab>
          </person-group>
          <article-title>Comparison of two data collection processes in clinical studies: electronic and paper case report forms</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2014</year>
          <month>01</month>
          <day>17</day>
          <volume>14</volume>
          <fpage>7</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/1471-2288-14-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2288-14-7</pub-id>
          <pub-id pub-id-type="medline">24438227</pub-id>
          <pub-id pub-id-type="pii">1471-2288-14-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC3909932</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thriemer</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ley</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ame</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Puri</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Hashim</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>NY</given-names>
            </name>
            <name name-style="western">
              <surname>Salim</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Ochiai</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Wierzba</surname>
              <given-names>TF</given-names>
            </name>
            <name name-style="western">
              <surname>Clemens</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Deen</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Replacing paper data collection forms with electronic data entry in the field: findings from a study of community-acquired bloodstream infections in Pemba, Zanzibar</article-title>
          <source>BMC Res Notes</source>
          <year>2012</year>
          <volume>5</volume>
          <fpage>113</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.biomedcentral.com/1756-0500/5/113"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1756-0500-5-113</pub-id>
          <pub-id pub-id-type="medline">22353420</pub-id>
          <pub-id pub-id-type="pii">1756-0500-5-113</pub-id>
          <pub-id pub-id-type="pmcid">PMC3392743</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <source>Food and Drug Administration</source>
          <access-date>2019-06-12</access-date>
          <comment>Use of electronic health record data in clinical investigations: guidance for industry 
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.fda.gov/downloads/Drugs/GuidanceComplianceRegulatoryInformation/Guidances/UCM501068.pdf">https://www.fda.gov/downloads/Drugs/GuidanceComplianceRegulatoryInformation/Guidances/UCM501068.pdf</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="75C2iUaBX"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meystre</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Kipper-Schuler</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Hurdle</surname>
              <given-names>JF</given-names>
            </name>
          </person-group>
          <article-title>Extracting information from textual documents in the electronic health record: a review of recent research</article-title>
          <source>Yearb Med Inform</source>
          <year>2008</year>
          <fpage>128</fpage>
          <lpage>144</lpage>
          <pub-id pub-id-type="medline">18660887</pub-id>
          <pub-id pub-id-type="pii">me08010128</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Matsumura</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hattori</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Manabe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Takahashi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yamamoto</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Murata</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nakagawa</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mihara</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Takeda</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Case report form reporter: a key component for the integration of electronic medical records and the electronic data capture system</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2017</year>
          <volume>245</volume>
          <fpage>516</fpage>
          <lpage>520</lpage>
          <pub-id pub-id-type="medline">29295148</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>El Fadly</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rance</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lucas</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mead</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chatellier</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lastic</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Jaulent</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Daniel</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Integrating clinical research with the Healthcare Enterprise: from the RE-USE project to the EHR4CR platform</article-title>
          <source>J Biomed Inform</source>
          <year>2011</year>
          <month>12</month>
          <volume>44 Suppl 1</volume>
          <fpage>S94</fpage>
          <lpage>S102</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(11)00125-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2011.07.007</pub-id>
          <pub-id pub-id-type="medline">21888989</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(11)00125-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patrick</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>High accuracy information extraction of medication information from clinical notes: 2009 i2b2 medication extraction challenge</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <month>10</month>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>524</fpage>
          <lpage>527</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&amp;pmid=20819856"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2010.003939</pub-id>
          <pub-id pub-id-type="medline">20819856</pub-id>
          <pub-id pub-id-type="pii">17/5/524</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995676</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>South</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>DuVall</surname>
              <given-names>SL</given-names>
            </name>
          </person-group>
          <article-title>2010 i2b2/VA challenge on concepts, assertions, and relations in clinical text</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>552</fpage>
          <lpage>556</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21685143"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000203</pub-id>
          <pub-id pub-id-type="medline">21685143</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000203</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168320</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tsujii</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>EI</given-names>
            </name>
          </person-group>
          <article-title>A classification approach to coreference in discharge summaries: 2011 i2b2 challenge</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2012</year>
          <volume>19</volume>
          <issue>5</issue>
          <fpage>897</fpage>
          <lpage>905</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22505762"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000734</pub-id>
          <pub-id pub-id-type="medline">22505762</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000734</pub-id>
          <pub-id pub-id-type="pmcid">PMC3422828</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Rumshisky</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Evaluating temporal relations in clinical text: 2012 i2b2 Challenge</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>806</fpage>
          <lpage>813</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23564629"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001628</pub-id>
          <pub-id pub-id-type="medline">23564629</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2013-001628</pub-id>
          <pub-id pub-id-type="pmcid">PMC3756273</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jagannathan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Mullett</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Arbogast</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Halbritter</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Yellapragada</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Regulapati</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bandaru</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Assessment of commercial NLP engines for medication information extraction from dictated clinical notes</article-title>
          <source>Int J Med Inform</source>
          <year>2009</year>
          <month>04</month>
          <volume>78</volume>
          <issue>4</issue>
          <fpage>284</fpage>
          <lpage>291</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2008.08.006</pub-id>
          <pub-id pub-id-type="medline">18838293</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(08)00153-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Stenner</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Doan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Waitman</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>MedEx: a medication information extraction system for clinical narratives</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>19</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&amp;pmid=20064797"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M3378</pub-id>
          <pub-id pub-id-type="medline">20064797</pub-id>
          <pub-id pub-id-type="pii">17/1/19</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995636</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>QT</given-names>
            </name>
            <name name-style="western">
              <surname>Goryachev</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sordo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Lazarus</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Extracting principal diagnosis, co-morbidity and smoking status for asthma research: evaluation of a natural language processing system</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2006</year>
          <volume>6</volume>
          <fpage>30</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-6-30"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1472-6947-6-30</pub-id>
          <pub-id pub-id-type="medline">16872495</pub-id>
          <pub-id pub-id-type="pii">1472-6947-6-30</pub-id>
          <pub-id pub-id-type="pmcid">PMC1553439</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A comprehensive study of named entity recognition in Chinese clinical text</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2014</year>
          <volume>21</volume>
          <issue>5</issue>
          <fpage>808</fpage>
          <lpage>814</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24347408"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-002381</pub-id>
          <pub-id pub-id-type="medline">24347408</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2013-002381</pub-id>
          <pub-id pub-id-type="pmcid">PMC4147609</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenbloom</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Mani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A study of machine-learning-based approaches to extract clinical entities and their assertions from discharge summaries</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>601</fpage>
          <lpage>606</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21508414"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000163</pub-id>
          <pub-id pub-id-type="medline">21508414</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000163</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168315</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rink</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Harabagiu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Automatic extraction of relations between medical concepts in clinical texts</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>594</fpage>
          <lpage>600</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21846787"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000153</pub-id>
          <pub-id pub-id-type="medline">21846787</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000153</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168312</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ashish</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Dahm</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Boicey</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>University of California, Irvine-Pathology Extraction Pipeline: the pathology extraction pipeline for information extraction from pathology reports</article-title>
          <source>Health Informatics J</source>
          <year>2014</year>
          <month>12</month>
          <volume>20</volume>
          <issue>4</issue>
          <fpage>288</fpage>
          <lpage>305</lpage>
          <pub-id pub-id-type="doi">10.1177/1460458213494032</pub-id>
          <pub-id pub-id-type="medline">25155030</pub-id>
          <pub-id pub-id-type="pii">1460458213494032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Automatic ICD-10 coding algorithm using an improved longest common subsequence based on semantic similarity</article-title>
          <source>PLoS One</source>
          <year>2017</year>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>e0173410</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0173410"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0173410</pub-id>
          <pub-id pub-id-type="medline">28306739</pub-id>
          <pub-id pub-id-type="pii">PONE-D-16-40232</pub-id>
          <pub-id pub-id-type="pmcid">PMC5356997</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bedard</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrall</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Wage and test score dispersion: some international evidence</article-title>
          <source>Economics of Education Review</source>
          <year>2003</year>
          <month>2</month>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>31</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.1016/s0272-7757(01)00060-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ratcliff</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Methods for dealing with reaction time outliers</article-title>
          <source>Psychol Bull</source>
          <year>1993</year>
          <month>11</month>
          <volume>114</volume>
          <issue>3</issue>
          <fpage>510</fpage>
          <lpage>532</lpage>
          <pub-id pub-id-type="medline">8272468</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Andrews</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>To transform or not to transform: using generalized linear mixed models to analyse reaction time data</article-title>
          <source>Front Psychol</source>
          <year>2015</year>
          <volume>6</volume>
          <fpage>1171</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.3389/fpsyg.2015.01171"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyg.2015.01171</pub-id>
          <pub-id pub-id-type="medline">26300841</pub-id>
          <pub-id pub-id-type="pmcid">PMC4528092</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Keene</surname>
              <given-names>ON</given-names>
            </name>
          </person-group>
          <article-title>The log transformation is special</article-title>
          <source>Stat Med</source>
          <year>1995</year>
          <month>04</month>
          <day>30</day>
          <volume>14</volume>
          <issue>8</issue>
          <fpage>811</fpage>
          <lpage>819</lpage>
          <pub-id pub-id-type="medline">7644861</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedrich</surname>
              <given-names>JO</given-names>
            </name>
            <name name-style="western">
              <surname>Adhikari</surname>
              <given-names>NKJ</given-names>
            </name>
            <name name-style="western">
              <surname>Beyene</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Ratio of geometric means to analyze continuous outcomes in meta-analysis: comparison to mean differences and ratio of arithmetic means using empiric data and simulation</article-title>
          <source>Stat Med</source>
          <year>2012</year>
          <month>07</month>
          <day>30</day>
          <volume>31</volume>
          <issue>17</issue>
          <fpage>1857</fpage>
          <lpage>1886</lpage>
          <pub-id pub-id-type="doi">10.1002/sim.4501</pub-id>
          <pub-id pub-id-type="medline">22438170</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schwappach</surname>
              <given-names>DLB</given-names>
            </name>
            <name name-style="western">
              <surname>Taxis</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pfeiffer</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Oncology nurses' beliefs and attitudes towards the double-check of chemotherapy medications: a cross-sectional survey study</article-title>
          <source>BMC Health Serv Res</source>
          <year>2018</year>
          <month>12</month>
          <day>17</day>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>123</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmchealthservres.biomedcentral.com/articles/10.1186/s12913-018-2937-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12913-018-2937-9</pub-id>
          <pub-id pub-id-type="medline">29454347</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12913-018-2937-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC5816392</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Paton</surname>
              <given-names>JY</given-names>
            </name>
          </person-group>
          <article-title>Medication errors in a paediatric teaching hospital in the UK: five years operational experience</article-title>
          <source>Arch Dis Child</source>
          <year>2000</year>
          <month>12</month>
          <volume>83</volume>
          <issue>6</issue>
          <fpage>492</fpage>
          <lpage>497</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://adc.bmj.com/cgi/pmidlookup?view=long&amp;pmid=11087283"/>
          </comment>
          <pub-id pub-id-type="medline">11087283</pub-id>
          <pub-id pub-id-type="pmcid">PMC1718567</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Montague</surname>
              <given-names>EN</given-names>
            </name>
            <name name-style="western">
              <surname>Kleiner</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Winchester</surname>
              <given-names>WW</given-names>
            </name>
          </person-group>
          <article-title>Empirically understanding trust in medical technology</article-title>
          <source>Int J Industr Ergonomics</source>
          <year>2009</year>
          <month>7</month>
          <volume>39</volume>
          <issue>4</issue>
          <fpage>628</fpage>
          <lpage>634</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ergon.2009.01.004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Parasuraman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Humans and automation: use, misuse, disuse, abuse</article-title>
          <source>Hum Factors</source>
          <year>2016</year>
          <month>11</month>
          <day>23</day>
          <volume>39</volume>
          <issue>2</issue>
          <fpage>230</fpage>
          <lpage>253</lpage>
          <pub-id pub-id-type="doi">10.1518/001872097778543886</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krishnankutty</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bellary</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>NBR</given-names>
            </name>
            <name name-style="western">
              <surname>Moodahadu</surname>
              <given-names>LS</given-names>
            </name>
          </person-group>
          <article-title>Data management in clinical research: an overview</article-title>
          <source>Indian J Pharmacol</source>
          <year>2012</year>
          <month>03</month>
          <volume>44</volume>
          <issue>2</issue>
          <fpage>168</fpage>
          <lpage>172</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.ijp-online.com/article.asp?issn=0253-7613;year=2012;volume=44;issue=2;spage=168;epage=172;aulast=Krishnankutty"/>
          </comment>
          <pub-id pub-id-type="doi">10.4103/0253-7613.93842</pub-id>
          <pub-id pub-id-type="medline">22529469</pub-id>
          <pub-id pub-id-type="pii">IJPharm-44-168</pub-id>
          <pub-id pub-id-type="pmcid">PMC3326906</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sorkin</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Woods</surname>
              <given-names>DD</given-names>
            </name>
          </person-group>
          <article-title>Systems with human monitors: a signal detection analysis</article-title>
          <source>Hum–Comput Interact</source>
          <year>2009</year>
          <month>11</month>
          <day>11</day>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>49</fpage>
          <lpage>75</lpage>
          <pub-id pub-id-type="doi">10.1207/s15327051hci0101_2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dzindolet</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Pomranky</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Pierce</surname>
              <given-names>LG</given-names>
            </name>
            <name name-style="western">
              <surname>Beck</surname>
              <given-names>HP</given-names>
            </name>
          </person-group>
          <article-title>The role of trust in automation reliance</article-title>
          <source>Int J Hum-Comput Stud</source>
          <year>2003</year>
          <month>6</month>
          <volume>58</volume>
          <issue>6</issue>
          <fpage>697</fpage>
          <lpage>718</lpage>
          <pub-id pub-id-type="doi">10.1016/S1071-5819(03)00038-7</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
