<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v7i3e13554</article-id>
      <article-id pub-id-type="pmid">31407666</article-id>
      <article-id pub-id-type="doi">10.2196/13554</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Core Data Elements in Acute Myeloid Leukemia: A Unified Medical Language System–Based Semantic Analysis and Experts’ Review</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ulrich</surname>
            <given-names>Hannes</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Amoz</surname>
            <given-names>Liz</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chen</surname>
            <given-names>Qingyu</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lee</surname>
            <given-names>Jaehoon</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="author" id="contrib1">
          <name name-style="western">
            <surname>Holz</surname>
            <given-names>Christian</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-3752-5436</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib2">
          <name name-style="western">
            <surname>Kessler</surname>
            <given-names>Torsten</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-7679-5362</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib3">
          <name name-style="western">
            <surname>Dugas</surname>
            <given-names>Martin</given-names>
          </name>
          <degrees>MSc, MD, Prof Dr</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-9740-0788</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib4" corresp="yes">
          <name name-style="western">
            <surname>Varghese</surname>
            <given-names>Julian</given-names>
          </name>
          <degrees>MD, MSci</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Institute of Medical Informatics</institution>
            <institution>University of Münster</institution>
            <addr-line>Institut für Medizinische Informatik Münster</addr-line>
            <addr-line>Albert-Schweitzer-Campus 1</addr-line>
            <addr-line>Münster, 48149</addr-line>
            <country>Germany</country>
            <phone>49 2518354714</phone>
            <email>julian.varghese@uni-muenster.de</email>
          </address>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-7206-3719</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
      <label>1</label>
      <institution>Institute of Medical Informatics</institution>
      <institution>University of Münster</institution>  
      <addr-line>Münster</addr-line>
      <country>Germany</country></aff>
      <aff id="aff2">
      <label>2</label>
      <institution>Department of Medicine A</institution>
      <institution>University Hospital of Münster</institution>  
      <addr-line>Münster</addr-line>
      <country>Germany</country></aff>
      <author-notes>
        <corresp>Corresponding Author: Julian Varghese 
        <email>julian.varghese@uni-muenster.de</email></corresp>
      </author-notes>
      <pub-date pub-type="collection"><season>Jul-Sep</season><year>2019</year></pub-date>
      <pub-date pub-type="epub">
        <day>12</day>
        <month>08</month>
        <year>2019</year>
      </pub-date>
      <volume>7</volume>
      <issue>3</issue>
      <elocation-id>e13554</elocation-id>
      <!--history from ojs - api-xml-->
      <history>
        <date date-type="received">
          <day>30</day>
          <month>1</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>23</day>
          <month>3</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>8</day>
          <month>5</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>31</day>
          <month>5</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Christian Holz, Torsten Kessler, Martin Dugas, Julian Varghese. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 12.08.2019.</copyright-statement>
      <copyright-year>2019</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2019/3/e13554/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>For cancer domains such as acute myeloid leukemia (AML), a large set of data elements is obtained from different institutions with heterogeneous data definitions within one patient course. The lack of clinical data harmonization impedes cross-institutional electronic data exchange and future meta-analyses.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to identify and harmonize a semantic core of common data elements (CDEs) in clinical routine and research documentation, based on a systematic metadata analysis of existing documentation models.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Lists of relevant data items were collected and reviewed by hematologists from two university hospitals regarding routine documentation and several case report forms of clinical trials for AML. In addition, existing registries and international recommendations were included. Data items were coded to medical concepts via the Unified Medical Language System (UMLS) by a physician and reviewed by another physician. On the basis of the coded concepts, the data sources were analyzed for concept overlaps and identification of most frequent concepts. The most frequent concepts were then implemented as data elements in the standardized format of the Operational Data Model by the Clinical Data Interchange Standards Consortium.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>A total of 3265 medical concepts were identified, of which 1414 were unique. Among the 1414 unique medical concepts, the 50 most frequent ones cover 26.98% of all concept occurrences within the collected AML documentation. The top 100 concepts represent 39.48% of all concepts’ occurrences. Implementation of CDEs is available on a European research infrastructure and can be downloaded in different formats for reuse in different electronic data capture systems.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Information management is a complex process for research-intense disease entities as AML that is associated with a large set of lab-based diagnostics and different treatment options. Our systematic UMLS-based analysis revealed the existence of a core data set and an exemplary reusable implementation for harmonized data capture is available on an established metadata repository.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>common data elements</kwd>
        <kwd>UMLS</kwd>
        <kwd>acute myeloid leukemia</kwd>
        <kwd>medical informatics</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Medical documentation is complex and time-consuming. In routine documentation, it accounts for approximately 25% of a physician’s workload and demands as much time as direct patient care [<xref ref-type="bibr" rid="ref1">1</xref>] and even more in study cases [<xref ref-type="bibr" rid="ref2">2</xref>]. All patients with acute myeloid leukemia (AML) are to be treated within studies, following expert panel recommendations [<xref ref-type="bibr" rid="ref3">3</xref>]. The number of patients with AML is relatively low with an incidence rate of around 3.7 per 100,000 in Europe [<xref ref-type="bibr" rid="ref4">4</xref>]. The 5-year survival rate is below 50% [<xref ref-type="bibr" rid="ref4">4</xref>]. Diagnostics and therapy comprise complex, repetitive laboratory analyses of different specimens at different points in time, chemotherapy cycles and schemes, donor search and selection, stem cell transplants, immunosuppressive therapy, repetitive follow-up examinations, and ongoing monitoring throughout the years of survival. All these are performed at different sites across Germany, Europe, and worldwide, depending on the hospitals’ facilities, donor selection, study group, and others. The complexity of the documentation process is obvious. In 2016, there were 4 AML study groups in Germany, that is, the <italic>AML Kooperative Gruppe</italic>, the <italic>Deutsche Studieninitiative Leukämie</italic>, the <italic>AML Study Group</italic>, and the <italic>Ostdeutsche Studiengruppe für Hämatologie und Onkologie</italic>. The European Leukemia Network (ELN) comprises more than 60 participating study centers. In 2016, there were 85 ongoing phase II or III trials for AML for adults listed in the European Union Clinical Trials Register for Germany (236 trials for the whole of Europe).</p>
        <p>Clinical trial documentation itself is typically extensive and time-consuming [<xref ref-type="bibr" rid="ref5">5</xref>]. In clinical trials, more than 1000 items such as laboratory values, vital signs, and diagnostic tests are collected per patient [<xref ref-type="bibr" rid="ref6">6</xref>]. The number of pages in case report forms (CRFs) per trial has risen from 55 to 180 during the past years [<xref ref-type="bibr" rid="ref5">5</xref>]. Study assistants are employed to reenter routine data into study CRFs manually, although automatic comparison and transformation is technically possible with minor limitations [<xref ref-type="bibr" rid="ref7">7</xref>]. In our case, technical assistants fill out the transplant-specific forms of the German Zentrales Knochenmarkspender-Register für die Bundesrepublik Deutschland and the European Society for Blood and Marrow Transplantation (EBMT) with routine data by hand. Study data from CRFs of the Study Alliance Leukemia (SAL) are transferred into the SAL register manually. This approach is error prone. Owing to the relatively low incidence of AML, there is no quality management or certification process as it is common in other entities such as breast, prostate, colon, or other cancers.</p>
        <p>Nowadays, special documentation assistants are employed to transfer routine data into software tools such as <italic>ONDIS</italic>, which is used in the administrative district of the Kassenärztliche Vereinigung Westfalen-Lippe. Both university clinics participating in this work are situated within this district. ONDIS serves as a tool for complete case documentation and quality management for manifestations of primary solid tumors but is also used for AML as, to our knowledge, there is no other option available on the market that provides the export and transfer of data to the epidemiologic cancer registries.</p>
        <p>In 2013, Ries et al [<xref ref-type="bibr" rid="ref8">8</xref>] stated that none of the existing German cancer datasets meet clinical documentation reality, even though they were already used as a base for cancer documentation, which is required by German law. To our knowledge, there are 2 datasets implemented in Germany, one by the Gesellschaft der Epidemiologischen Krebsregister in Deutschland e.V. and the other one by the Arbeitsgemeinschaft Deutscher Tumorzentren (ADT). They were established in 2008, revised in 2014, and are under ongoing modifications. Today, there are special datasets for breast, prostate, colon, glioma, and some other cancers, but there is none for leukemia. The 2018 ADT core dataset itself does not reflect on cancers without the manifestations of primary solid tumors, such as AML. Thus, it seems that no core dataset for AML documentation exists so far.</p>
        <p>The layout and content of forms, regardless of which documentation context, organization, or medium, are mostly kept as intellectual property of the particular organization. This applies to standard forms of routine documentation in hospitals, CRFs in clinical or epidemiological studies performed by study groups, and register forms of national and international registries. They are not accessible to the public [<xref ref-type="bibr" rid="ref9">9</xref>]. In addition, the mode of documentation is varying. Patient care forms often comprise free-text elements, whereas clinical trial documentation is structured on a higher level [<xref ref-type="bibr" rid="ref2">2</xref>]. The reuse potential of information is generally higher if the original data are documented in a structured way [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>The redundancy level of documentation within different documentation contexts is high [<xref ref-type="bibr" rid="ref5">5</xref>]. Even the German Ministry of Health already recognized that large amounts of data are gathered redundantly and that cost-benefit analyses are recommendable [<xref ref-type="bibr" rid="ref12">12</xref>]. It was proofed that digitalization of paper-based forms may not only reduce the workload for physicians in their daily routine by reducing redundant documentation [<xref ref-type="bibr" rid="ref13">13</xref>] but may also generally improve the approach to structured documentation, facilitating improved accessibility, interoperability, and analysis of data [<xref ref-type="bibr" rid="ref14">14</xref>]. Ongoing studies on interoperability standards of different documentation solutions are important and valuable for standardization of structured documentation [<xref ref-type="bibr" rid="ref13">13</xref>] and secondary use of data, for example, in the scope of studies [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. Structured documentation through the use of common data elements (CDEs) can improve data quality and data sharing [<xref ref-type="bibr" rid="ref18">18</xref>]. The collection of detailed information of every single AML case is essential for patient surveillance [<xref ref-type="bibr" rid="ref19">19</xref>]. Previous work already showed the benefit that can be achieved if all patients’ documentation is semantically annotated in cancers of the breast and prostate [<xref ref-type="bibr" rid="ref2">2</xref>].</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>The aim of this work was to search for CDEs of AML documentation in clinical routine, registries, and studies. It focuses on the methods to create and provide standards for documentation and CDEs. It extends the previous collection of key data elements for myeloid leukemia, which has undergone clinical evaluation by several hematologists [<xref ref-type="bibr" rid="ref13">13</xref>] and now focuses on specific data items for AML based on a larger dataset.</p>
        <p>A medical concept is a semantic identifier to encode the medical information that is required by the documentation of an item. The item <italic>patient performance status</italic>, for example, is encoded by the concept <italic>ECOG performance status, UMLS C1520224</italic>. By adding the type of data and possible values to the concepts, a list of CDEs is created [<xref ref-type="bibr" rid="ref20">20</xref>]. This list is usable to harmonize documentation of different contexts and to facilitate improved interoperability between health information systems.</p>
        <p>The systematic analysis is performed on a set of different forms collected by the authors and semantically enriched using Unified Medical Language System (UMLS) codes [<xref ref-type="bibr" rid="ref21">21</xref>]. The collection contains sets of AML documentation from 2 German university hospitals, international clinical AML studies performed by 3 study groups, national and international register forms, and a de facto international standard published previously by the ELN [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>On the basis of the comparison of documentation forms, the following questions are addressed:</p>
        <list list-type="order">
          <list-item>
            <p>What are the most frequently used medical concepts in AML documentation?</p>
          </list-item>
          <list-item>
            <p>To which degree do the register, routine, and clinical trial documentation represent or meet the ELN standard?</p>
          </list-item>
          <list-item>
            <p>To which extent do routine, clinical trial, and register documentation overlap?</p>
          </list-item>
          <list-item>
            <p>Do the sets of routine documentation of different hospitals differ (Bochum and Münster)? To which extent do datasets of register match with each other (EBMT and SAL)?</p>
          </list-item>
        </list>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Collection</title>
        <p>Different documentation contexts of AML were identified based on previous reports to represent a wide range of routine and research documentation on AML [<xref ref-type="bibr" rid="ref13">13</xref>], which are listed in <xref ref-type="table" rid="table1">Table 1</xref>.</p>

        <p>The collection of forms was performed between December 2015 and October 2016. A total of 2 university hospitals provided their electronic routine documentation forms and we chose 11 discharge letters—reviewed by a hematologist and deemed representative and complete regarding documentation items—out of the collection of cases of the previous 24 months. They were anonymized before the analysis started. Overall, 15 routine documentation forms such as laboratory reports, medical history, diagnostic finding, and stem cell transplant forms of both hospitals were collected and manually compared against the discharge letters. In total, 8 of them were annotated. In addition, 2 study groups from Germany and the Netherlands provided complete CRFs of 7 national or international studies. Furthermore, 3 registries of different sizes were identified via an Web-based query and by contacting the hematologist-oncologists. Their forms were collected. All right holders agreed to the analysis of forms and parts of the forms were publicly available. All documents were checked for integrity by 2 hematologist-oncologists familiar with AML therapy, documentation, and studies. <xref ref-type="table" rid="table1">Table 1</xref> shows the different documentation contexts the forms were assigned to and their numbers.</p>
          
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Documentation context and forms in each field.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="700"/>
            <thead>
              <tr valign="top">
                <td>Documentation context</td>
                <td>Number of sources</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Routine documentation</td>
                <td>11 comprehensive, representative discharge letters of 2 university hospitals (Routine BO<sup>a</sup>+Routine MS<sup>b</sup>); 15 forms of routine documentation of 2 university hospitals (8 semantically annotated)</td>
              </tr>
              <tr valign="top">
                <td>Registries</td>
                <td>2 (EBMT<sup>c</sup>, SAL<sup>d</sup>-AML<sup>e</sup>)</td>
              </tr>
              <tr valign="top">
                <td>Studies</td>
                <td>3 (all case report forms of HOVON 132<sup>f</sup>, AML-AZA<sup>g</sup>, AMLSG 21-13<sup>h</sup>)</td>
              </tr>
              <tr valign="top">
                <td>Quality measurement</td>
                <td>None (not existing)</td>
              </tr>
              <tr valign="top">
                <td>Recommendations of official associations</td>
                <td>1 (European Leukemia Network recommendations [<xref ref-type="bibr" rid="ref3">3</xref>])</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Routine BO: University Hospital Bochum-Langendreer.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Routine MS: University Hospital of Münster.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>EBMT: Register by the European Society for Blood and Marrow Transplantation.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>SAL: Study Alliance Leukemia.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>AML: acute myeloid leukemia.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>HOVON 132: Haemato Oncology Foundation for Adults in the Netherlands, Study 132.</p>
            </fn>
            <fn id="table1fn7">
              <p><sup>g</sup>AML-AZA: a randomized, multi-center phase II trial to assess the efficacy of 5-azacytidine added to standard primary therapy in elderly patients with newly diagnosed AML of University Münster.</p>
            </fn>
            <fn id="table1fn8">
              <p><sup>h</sup>AMLSG 21-13: Deutsch-Österreichische Studiengruppe Akute Myeloische Leukämie, Study 21-13.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
           <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Process of creating common data elements. AML: acute myeloid leukemia; ODM: Operational Data Model; MDM: Medical Data Models; UMLS: Unified Medical Language System.</p>
            </caption>
            <graphic xlink:href="medinform_v7i3e13554_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
      </sec>
      <sec>
        <title>Data Analysis</title>
        <sec>
          <title>Semantic Form Annotation</title>
          <p>The overall process is illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>. All collected documentation models (see <xref ref-type="table" rid="table1">Table 1</xref>) were mapped into the Operational Data Model (ODM), defined by the Clinical Data Interchange Standards Consortium (CDISC). The Medical Data Models Portal (MDM-Portal) [<xref ref-type="bibr" rid="ref22">22</xref>] served as a Web framework for creating ODM files using the ODM editor (University of Münster) [<xref ref-type="bibr" rid="ref6">6</xref>] to standardize the input forms and to manually add semantic codes for form items. Semantic codes were chosen from the UMLS meta-thesaurus by a medical expert, based on the existing coding principles [<xref ref-type="bibr" rid="ref23">23</xref>]. Medical concepts were manually extracted from the discharge letters, which are naturally free-text letters, and then semantically annotated with UMLS codes.As the coding principles indicate, pre and postcoordinated codes were chosen per item. If no precoordinated code was available for a medical concept, postcoordination was considered. Items with nonmedically relevant data (eg, <italic>page number</italic>) or insignificant content such as <italic>other</italic>, <italic>specify</italic>, or <italic>further comment</italic> were ignored.</p>
         
        </sec>
        <sec>
          <title>Semiautomated Analysis</title>
          <p>The manually UMLS-coded ODM forms were uploaded to the MDM-Portal and made publicly available. A second review that was followed by a UMLS-experienced physician ensured the quality of the coded concepts. Disagreements in coding were discussed between physicians regarding coding principles [<xref ref-type="bibr" rid="ref23">23</xref>] and the frequency rate–assisted MDM-Portal ODM editor was used. The coded ODM forms were analyzed by CDEGenerator [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref24">24</xref>], an in-house implemented Java-based Web application. CDEGenerator automatically sorts medical concepts (eg, medication) of the existing data items according to their frequency (by counting identical UMLS codes) and also shows similarity of medical concepts based on the code overlaps of postcoordinated concepts, for example, <italic>medication start date</italic> is similar to <italic>medication end date</italic>, as the main concept <italic>medication</italic> is the same. An initial list of most frequent medical concepts and concept overlaps between all different forms was generated.</p>
        </sec>
        <sec>
          <title>Generation of Common Data Elements</title>
          <p>A list of most frequent medical concepts was generated by CDEGenerator by analyzing all ODM files and counting same UMLS codes. Concepts that were semantically similar (eg, birth date/age, gender/sex, and previous malignancy/tumor history) were grouped as one based on the expert’s decision.By adding to each medical concept its datatype and possible values, for example, codelist items, a medical concept also represents a data element [<xref ref-type="bibr" rid="ref20">20</xref>]. Data elements that were documented coherently (eg, systolic and diastolic blood pressure) were grouped into item groups. A data element will be added to the resulting set of CDEs if it occurs at least twice within all sources or if it is listed in the standard published by the ELN [<xref ref-type="bibr" rid="ref3">3</xref>]. The list was then checked by a medical expert to avoid any redundancies or important missing medical concepts. All CDEs and item groups were then mapped to documentation categories and implemented as standardized CDISC-ODM files and uploaded to the MDM-Portal for scientific discussions and reuse.</p>
        </sec>
        <sec>
          <title>Pairwise Comparison of Documentation Contexts</title>
          <p>The pairwise comparison of different documentation contexts can be made on different bases: (1) the comparison of different contexts such as routine and clinical trial documentation with each other; (2) the comparison of different sources of the same context, such as routine documentation of different origins/hospitals; (3) the overlap between the ELN standard and a combination of other contexts such as routine and clinical trial merged together.</p>
          <p>CDEGenerator was used to identify common concepts of different sources or contexts and to output percentages of overlapping concepts.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
      <title>Overview</title>
      <p>To identify a semantic core of frequently used medical concepts in routine and research documentation of AML, a total of 3265 medical concept occurrences were identified of which 3245 could be UMLS-coded (99.38%). After review of a second UMLS-experienced physician, 27 concepts (0.83%) were given different UMLS codes upon consensus decision. Among all concept occurrences, 1414 were unique medical concepts. The next section provides details on the frequency of concept occurrences.</p></sec>
      <sec>
        <title>Cumulative Frequencies</title>
        <p>Among 1414 unique medical concepts, the 50 most frequent medical concepts cover 26.98% of all concept occurrences within the collected AML documentation. The top 100 concepts represent 39.48% of all concept occurrences. <xref ref-type="fig" rid="figure2">Figure 2</xref> shows the cumulative frequencies.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Cumulative frequency coverage of all different concepts. The 50 most common concepts cover about 27% of all concept occurrences, and the 100 most frequent concepts cover about 39.5% of all concept occurrences.</p>
          </caption>
          <graphic xlink:href="medinform_v7i3e13554_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Unified Medical Language System Terminology and Acute Myeloid Leukemia</title>
        <p>For about 1% (m=20) of the relevant medical concepts, no adequate UMLS code could be assigned, such as for the following codelist items: <italic>matched related donor</italic>, <italic>matched unrelated donor</italic>, <italic>mismatched unrelated donor</italic>, <italic>HLA identical sibling</italic>, <italic>HLA identical parent</italic>, and <italic>2 or more antigen mismatched related donor</italic> (all belonging to bone marrow transplantation donors). Concerning graft-versus-host disease status, items such as <italic>resolved to baseline</italic>, <italic>resolved with sequelae</italic>, <italic>ongoing with higher CTCAE grade</italic> were missing. Owing to the complexity of these concepts, postcoordination for these concepts was not applied to avoid information loss. In addition, certain AML-specific vocabulary is also missing—or may be underrepresented—in the UMLS terminology. The <italic>WHO tumor classification</italic>, for example, has a UMLS code but not the <italic>WHO AML classification</italic>. The following concepts were also missing in the UMLS databases at the time of the research: <italic>EBMT risk score</italic>, <italic>clusters of blasts</italic>, <italic>−</italic> <italic>7q/7q mutation</italic>, and <italic>Hematopoietic Cell Transplantation-Comorbidity Index (HCT-CI)</italic>. Some medical concepts have 2 different codes, such as <italic>C1516728</italic> <italic>—</italic> <italic>Common Terminology Criteria for Adverse Events</italic> and <italic>C3888020</italic> <italic>—</italic> <italic>Common Terminology Criteria for Adverse Events</italic>, even though the same concepts are meant.</p>
      </sec>
      <sec>
        <title>Generation of Common Data Elements</title>
        <p>The generation of CDEs was realized by counting absolute frequencies of UMLS codes over all collected and annotated forms. Items represented in at least 2 different sources were added to the list of CDEs. UMLS codes found only in 1 single documentation source were excluded, even if used repeatedly there. <xref ref-type="fig" rid="figure2">Figure 2</xref> provides an overview of documentation categories. All CDEs were implemented as CDISC-ODM files and are available with open-access on the MDM-Portal. The portal provides a number of conversions such as to REDCap (Research Electronic Data Capture) models and HL7 FHIR (Health Level Seven Fast Healthcare Interoperability Resource) questionnaires [<xref ref-type="bibr" rid="ref25">25</xref>].</p>
        <p>We could show that the CDEs appeared in all medical categories throughout the patient therapy course. CDEs exist from the beginning to end of therapy (<xref ref-type="fig" rid="figure3">Figure 3</xref>).</p>
        <p>The most frequently used concept of all documentation contexts is <italic>disease response</italic>. <xref ref-type="table" rid="table2">Table 2</xref> shows a list of the 20 most CDEs relevant for AML therapy, their subconcepts, absolute concept frequency, and documentation context in which the concepts are represented in.</p>
        <p>The top 30 laboratory concepts are presented separately in <xref ref-type="table" rid="table3">Table 3</xref>, analogous to <xref ref-type="table" rid="table2">Table 2</xref>. Unspecific data elements have been manually filtered, for example, <italic>patient birth date</italic>, <italic>gender</italic>, and <italic>patient name</italic>. A complete list of all concepts is found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Implementation of data elements according to Clinical Data Interchange Standards Consortium–Operational Data Model format is available in [<xref ref-type="bibr" rid="ref25">25</xref>].</p>
         <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Documentation landscape of the common data elements (CDEs) of acute myeloid leukemia patients. Each circle represents a documentation category of the CDEs. The area of a circle corresponds to the number of data elements in that category. For example, there are 45 data elements within the laboratory blood panel, which represents the largest documentation category. A total of 212 CDEs were identified. App.-based diagn.: Apparatus-based diagnostics (eg, ultrasound and electrocardiogram).</p>
          </caption>
          <graphic xlink:href="medinform_v7i3e13554_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Top 20 of the most frequent concepts sorted by absolute concept frequency.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="440"/>
            <col width="230"/>
            <col width="60"/>
            <col width="70"/>
            <col width="70"/>
            <col width="60"/>
            <col width="70"/>
            <thead>
              <tr valign="bottom">
                <td>Concept and subconcepts</td>
                <td>Documentation category</td>
                <td>ACF<sup>a</sup></td>
                <td colspan="4">Documentation context</td>
              </tr>
              <tr valign="top">
              <td><break/></td>
              <td><break/></td>
              <td><break/></td>
                <td>Routine</td>
                <td>Register</td>
                <td>Study</td>
                <td>ELN<sup>b</sup> standard</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Disease response/remission status: Complete remission; Complete remission with incomplete hematologic recovery; Partial response; Complete remission cytogenetic; Complete remission molecular; Resistant disease; Partial remission recurrence/relapse; and death in aplasia</td>
                <td>Treatment details</td>
                <td>42</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Treatment status: number of therapies since the last visit; treatment outside of a study, palliative+after end of treatment; did patient start protocol treatment; cycle treatment/action taken; current therapy; additional therapies since last follow-up; treatment given since last report; disease treatment (apart from donor cell infusion or other type of cell therapy); treatment for disease; and planned (planned before HSCT<sup>c</sup> took place)+current therapy</td>
                <td>Treatment details</td>
                <td>24</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>—<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>Adverse event: adverse event; adverse event number; adverse event indicator; and description of adverse event</td>
                <td>Treatment details</td>
                <td>16</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Platelet engraftment: date of engraftment; platelets self-sustaining; and platelets &gt;x mg/dL</td>
                <td>Treatment details</td>
                <td>12</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Neutrophil engraftment: date of engraftment; neutrophil self-sustaining; and neutrophils &gt;x mg/dL at day</td>
                <td>Treatment details</td>
                <td>11</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Chemotherapy cycle</td>
                <td>Treatment details</td>
                <td>12</td>
                <td>—</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Concomitant medication</td>
                <td>Treatment details</td>
                <td>11</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Diagnosis: WHO<sup>e</sup> classification; FAB<sup>f</sup> classification; date of diagnosis; and first diagnosis</td>
                <td>Physical examination/follow up</td>
                <td>27</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Patient performance status: Karnofsky index and ECOG<sup>g</sup> performance status</td>
                <td>Physical examination/follow up</td>
                <td>19</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Concomitant disease/comorbidity: comorbidity; baseline concomitant diseases; and concurrent severe and/or uncontrolled condition</td>
                <td>Physical examination/follow up</td>
                <td>17</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Second malignancy/other tumor: previous tumor disease in history; preexisting solid tumor (chemotherapy required); secondary malignancy; and second primary malignancy</td>
                <td>Physical examination/follow up</td>
                <td>16</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Cause of death</td>
                <td>Physical examination/follow up</td>
                <td>10</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Diagnosis date</td>
                <td>Physical examination/follow up</td>
                <td>13</td>
                <td>—</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Survival status: alive; dead; and unknown (lost to follow-up)</td>
                <td>Physical examination/follow up</td>
                <td>12</td>
                <td>—</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Extramedullary manifestation of disease</td>
                <td>Physical examination/follow up</td>
                <td>15</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Pregnancy</td>
                <td>Physical examination/follow up</td>
                <td>12</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Drug toxicity</td>
                <td>Physical examination/follow up</td>
                <td>12</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>HSCT details: HSCT-indicator; HSCT-type; date of transplantation; relation to donor; and chimerism</td>
                <td>Bone marrow transplant</td>
                <td>16</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Previous chemotherapy/radiotherapy, antineoplastic protocols: year of chemotherapy/radiotherapy; chemotherapy medication; and radiotherapy specification</td>
                <td>Medical history</td>
                <td>11</td>
                <td>—</td>
                <td>✓</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Concomitant medication</td>
                <td>Treatment details</td>
                <td>11</td>
                <td>—</td>
                <td>—</td>
                <td>✓</td>
                <td>—</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
                      <fn id="table2fn1">
              <p><sup>a</sup>ACF: absolute concept frequency; n=1057.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>ELN: European Leukemia Network.</p>
            </fn>

            <fn id="table2fn3">
              <p><sup>c</sup>HSCT: human stem cell transplant.</p>
            </fn>
                        <fn id="table2fn4">
              <p><sup>d</sup>Data element is not represented in the documentation context.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>WHO: World Health Organization.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>FAB: French-American-British-Classification.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>ECOG: Eastern Co-operative Oncology Group.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Top 30 of the most frequent laboratory concepts sorted by absolute concept frequency.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="270"/>
            <col width="400"/>
            <col width="60"/>
            <col width="70"/>
            <col width="70"/>
            <col width="60"/>
            <col width="70"/>
            <thead>
              <tr valign="bottom">
                <td>Concept and subconcepts</td>
                <td>Documentation category</td>
                <td>ACF<sup>a</sup></td>
                <td colspan="4">Documentation context</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td>Routine</td>
                <td>Register</td>
                <td>Study</td>
                <td>ELN<sup>b</sup> standard</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Platelets blood level</td>
                <td>Laboratory: blood panel</td>
                <td>13</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Bilirubin blood level</td>
                <td>Laboratory: blood panel</td>
                <td>13</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Platelets blood level</td>
                <td>Laboratory: blood panel</td>
                <td>13</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>White blood count / leukocytes</td>
                <td>Laboratory: blood panel</td>
                <td>12</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>GPT<sup>c</sup></td>
                <td>Laboratory: blood panel</td>
                <td>11</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Blood group</td>
                <td>Laboratory: blood panel</td>
                <td>11</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>—<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>Serum creatinine</td>
                <td>Laboratory: blood panel</td>
                <td>10</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Lactat dehydrogenase</td>
                <td>Laboratory: blood panel</td>
                <td>9</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>INR<sup>e</sup>/Quick</td>
                <td>Laboratory: blood panel</td>
                <td>9</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Hemoglobin</td>
                <td>Laboratory: blood panel</td>
                <td>9</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>aPTT<sup>f</sup></td>
                <td>Laboratory: blood panel</td>
                <td>7</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Alkaline phosphatase</td>
                <td>Laboratory: blood panel</td>
                <td>7</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>GOT<sup>g</sup></td>
                <td>Laboratory: blood panel</td>
                <td>7</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Uric acid</td>
                <td>Laboratory: blood panel</td>
                <td>7</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Cytogenetic examinations</td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>13</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Blast cells/blast</td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>15</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Bone marrow examination<sup>h</sup></td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>13</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Monocytes</td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>11</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Lymphocytes</td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>10</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>CD34<sup>i</sup> positivity</td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>10</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Auer rods</td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>9</td>
                <td>—</td>
                <td>—</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Clusters of blasts</td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>9</td>
                <td>—</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Karyotype</td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>8</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Eosinophils</td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>8</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Basophils</td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>7</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Promyelocytes</td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>7</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Metamyelocytes</td>
                <td>Laboratory: cytology/cytogenetics/cytochemistry</td>
                <td>7</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>CMV<sup>j</sup> positivity</td>
                <td>Laboratory: infectiology</td>
                <td>10</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Ebbstein-Barr virus positivity</td>
                <td>Laboratory: infectiology</td>
                <td>8</td>
                <td>✓</td>
                <td>✓</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Urine protein</td>
                <td>Laboratory: urinalysis</td>
                <td>7</td>
                <td>✓</td>
                <td>—</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>ACF: absolute concept frequency.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>ELN: European Leukemia Network.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>GPT: glutamate pyruvate transaminase.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>Data element is not represented in the documentation context.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>INR: international normalized ratio.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>aPTT: activated partial thromboplastin time.</p>
            </fn>
            <fn id="table3fn7">
              <p><sup>g</sup>GOT: glutamic oxaloacetic transaminase.</p>
            </fn>
            <fn id="table3fn8">
              <p><sup>h</sup>Subconcepts: bone marrow puncture; bone marrow sample; bone marrow sampling date; and bone marrow examination possible.</p>
            </fn>
            <fn id="table3fn9">
              <p><sup>i</sup>CD34: cluster of differentiation 34.</p>
            </fn>
            <fn id="table3fn10">
              <p><sup>j</sup>CMV: Cytomegalie virus.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
       
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Overlaps of pairwise documentation contexts (A,B).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="240"/>
            <col width="90"/>
            <col width="210"/>
            <col width="90"/>
            <col width="100"/>
            <col width="150"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>A</td>
                <td>|<italic>A</italic>|</td>
                <td><italic>B</italic></td>
                <td>|<italic>B</italic>|</td>
                <td>|A ∩ B|</td>
                <td>|A ∩ B| / |A|, %</td>
                <td>|A ∩ B| / |B|, %</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Clinical trial documentation</td>
                <td>752</td>
                <td>Routine documentation</td>
                <td>250</td>
                <td>116</td>
                <td>15.43</td>
                <td>46.40</td>
              </tr>
              <tr valign="top">
                <td>Clinical trial documentation</td>
                <td>752</td>
                <td>Registries</td>
                <td>428</td>
                <td>117</td>
                <td>15.56</td>
                <td>27.34</td>
              </tr>
              <tr valign="top">
                <td>Clinical trial documentation</td>
                <td>752</td>
                <td>ELN<sup>a</sup> standard</td>
                <td>154</td>
                <td>70</td>
                <td>9.31</td>
                <td>45.45</td>
              </tr>
              <tr valign="top">
                <td>ELN standard</td>
                <td>154</td>
                <td>Routine documentation</td>
                <td>250</td>
                <td>46</td>
                <td>29.87</td>
                <td>18.40</td>
              </tr>
              <tr valign="top">
                <td>ELN standard</td>
                <td>154</td>
                <td>Registries</td>
                <td>428</td>
                <td>36</td>
                <td>23.38</td>
                <td>8.41</td>
              </tr>
              <tr valign="top">
                <td>Registries</td>
                <td>428</td>
                <td>Routine documentation</td>
                <td>250</td>
                <td>83</td>
                <td>19.39</td>
                <td>33.20</td>
              </tr>
              <tr valign="top">
                <td>Routine Bochum</td>
                <td>112</td>
                <td>Routine Münster</td>
                <td>138</td>
                <td>106</td>
                <td>94.64</td>
                <td>76.81</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>ELN: European Leukemia Network.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Overlap Analysis for Pairwise Comparison of Documentation Contexts</title>
        <p><xref ref-type="table" rid="table4">Table 4</xref> shows the result of the overlap analysis. Routine documentation (250 unique concepts), clinical trial documentation (752 unique concepts), registries (428 unique concepts), and ELN standard (154 unique concepts) are compared and show an overlap of 9% to 46%.</p>
        
      </sec>
      <sec>
        <title>Comparison of Routine and Clinical Trial Documentation</title>
        <p>The clinical trial documentation comprises 752 different medical concepts, whereas the routine documentation comprises 250 concepts. Furthermore, 46.4% of the items in the routine documentation are also found in clinical trial documentation. Naturally, items such as <italic>study site identifier/hospital ID</italic> UMLS code C2825164 are found in study and register documentation but not in routine documentation. More therapy-specific items, such as <italic>adverse event</italic> C0877248, are concepts that can only be found in clinical trial documentation. Meanwhile, the existence of an <italic>extramedullary manifestation</italic> C1868812 is naturally of substantial medical interest and can therefore be found in all documentation areas and exists in all of those. <italic>EBV-positivity</italic> C0014644, <italic>toxoplasmose-positivity</italic> C0040558, or <italic>CRP</italic> C0201657 were relevant in routine documentations of both university hospitals but in none of the included CRFs of clinical trials.</p>
      </sec>
      <sec>
        <title>Clinical Trial Documentation and Registries</title>
        <p>The registries analyzed in this work used 428 different concepts. The overlap of clinical trials documentation (752) and registries is 15.5% relating to clinical trial documentation and 27.3% relating to registries. Nearly one-third of the registries’ data can be found in the clinical trial documentation. <italic>Concomitant medication</italic> C2347852 is relevant for all clinical trials but not mentioned in registries. Again, <italic>EBV-positivity</italic> C0014644 is found in all registries and in routine documentation but in none of the studies.</p>
      </sec>
      <sec>
        <title>Comparison of European Leukemia Network Standard With Registries</title>
        <p>By comparing the registries (428) with the ELN standard (154), overlaps of 23.3% with regard to registries and 8.4% with regard to the ELN standard were found. This was the lowest overlap found for all analyses performed in this study. Administrative and organizational items are missing in the ELN standard. Examinations are often only mentioned in the standard, but their detailed medical concepts are not all listed, for example, <italic>hemoglobin</italic> C0019046 can be found in all documentation fields but not the ELN standard. This also applies to entries regarding the therapy. Registries are mainly focused on the long-term aspects of the disease such as etiology or outcome/follow-up and much less on specific therapy-relevant lab parameters. Concepts such as blood hemoglobin concentration are not mentioned in registries but are of high importance in diagnostics and therapy of the disease.</p>
      </sec>
      <sec>
        <title>Comparison of Routine Documentation of 2 Hospitals</title>
        <p>Finally, the routine documentations of the University Hospital Bochum-Langendreer and the University Hospital of Münster were compared, and routine documentation consisted of 112 and 138 medical concepts, respectively. The overlap of both is 94.6% and 76.8%, respectively. This amounts for the highest overlap of all analyses of this study. Items such as C0019196 and C0019159, which represent hepatitis C/A positivity, were only a part in one of the 2 hospitals’ routine documentation. The same applies to <italic>D-dimer</italic> C2826333, <italic>blood gas analysis</italic> C0005800, or <italic>chloride</italic> C0008203.</p>
      </sec>
      <sec>
        <title>Comparison of Clinical Trials and the European Leukemia Network Standard</title>
        <p>Nearly half of the medical concepts of the international standard are found in the documentation of clinical trials. The clinical trial documentation consists of more than 700 medical concepts, 4 times more than the European Leukemia Network standard of around 150 medical concepts.</p>
      </sec>
      <sec>
        <title>Comparison of the European Leukemia Network Standard and Routine Documentation</title>
        <p>In the routine documentation, around one-third of the items of the ELN standard are represented. One-fifth of the routine documentation items are found in the ELN standard. For instance, <italic>date of birth/age</italic> C1704632 are mentioned in both routine documentation and the ELN standard. <italic>Blood group</italic> C0005810, <italic>weight</italic> C0005910, and <italic>magnesium</italic> C0364745 are mentioned in routine documentation but not in the ELN standard. <italic>t(v;11)(v;q23) mutation</italic> C1515810, <italic>nonspecific esterase</italic> C0054741, or <italic>prior exposure to toxic agents</italic> C0014412 are found in the standard but not in routine documentation.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Documentation of AML is complex and time-consuming. The neoplastic disease has complex therapy options, a sophisticated chemotherapy regimen, and often the need for preparation and performance of stem cell transplantation. In addition, there is a need for matching cancer documentation guidelines and recommendations by law in Germany. The fact that most patients are treated within studies leads to further documentation arms.Different health care institutions are involved in the documentation process.The detailed analysis performed in this study could clearly show that the content of AML documentation is often quite redundant. Clinical trial documentation and routine documentation overlap by 42.6%. By establishing interfaces between those documentation contexts, information once gathered could be automatically synced. This clearly reduces the documentation effort.Across all documentation contexts in AML, a basic dataset of 50 CDEs was found to amount for 43.7% of all different medical concepts used. This relatively small number of items could be used as a core dataset. Reusing this semantically annotated dataset would reduce redundancy and costs when it would be made available to all documentation fields for automatic export. In practice, a dynamic database continuously updated with the most recent values of the CDEs could become source for automatic extraction of elements for other documentation arms such as registries, clinical trial documentation, and others. As a small practical example, requesting therapeutic drug levels could work in just 1 click. Today, it is often necessary to fill out forms with <italic>patient weight</italic>, <italic>age</italic>, <italic>gender</italic>, and <italic>kidney test values</italic> manually. On a large scale, high percentages of clinical trial documentation could be filled out automatically. Imagine your mobile phone’s autocomplete/word completion functionality. It enhances you to fill out specific forms and websites faster and more convenient by anticipating possible values and giving you option to choose these. Analogue case-specific completion of data in Electronic health records is feasible on a base of CDEs. At the same time, standardization and quality assurance would become easier to perform because of the transparency in documentation.</p>
        <p>We could show that the semantic annotation of nearly a whole complex medical entity is feasible, by reaching an annotation rate of more than 98%.Semantic annotations mark the distinct, clear meaning of medical documentation items. Therefore, they enhance the possibilities of data integration and exchange [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. Applying statistical tools to an annotated dataset can help identify missing medical concepts or solitary ones. Solitary items might be outdated, too. As an example, in our work, the concept <italic>EBV-positivity</italic> was mentioned during routine documentation and in registries but is not/no more of interest in research (study documentation). Thinking one step further, semantic annotation could open the doors for reusing data, for example, for studies with other aims (secondary use). Not only for scientific questions, but also for the daily routine of physicians, a fully annotated documentation is of practical value. Automatic generation of standardized discharge letters using dynamically filled text blocks means time-savings and improves quality and safety through structured documentation [<xref ref-type="bibr" rid="ref2">2</xref>]. Additional benefit of an annotated documentation is the good searchability, even across different languages.</p>
        <p>We noticed that blank medical forms of all documentation contexts are difficult to find and gain access to. As a strength of this work, personal contact with the authors of clinical trials, routine documentation, and registries was established and written consent to the usage was obtained. A higher level of awareness of the value needs to be reached.</p>
        <p>We experienced what is known from other research: there is apparently no knowledge of the value of the blank CRFs [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
      </sec>
      <sec>
        <title>Limitations and Strengths</title>
        <p>In this work, the process of extraction and annotation of items from discharge letters was performed and supervised by physicians. This ensured a high level of semantic quality of the generated data. A human medical professional can extract medical concepts out of free-text elements, tables, graphics, and other sources. Medical concepts had to be recognized, extracted, and annotated. This approach requires a lot of effort in terms of time, personal resources, and, in the end, noticeable costs.</p>
        <p>The aim of this work was to create a dataset of high quality out of routine data. As further data models with new biomarkers and other relevant concepts will arise in the future, an alternative step would be to combine our methodology with a preceding natural language processing (NLP) pipeline to automatically analyze a larger set of &gt;1000 documentation sources.</p>
        <p>Our method was to annotate medical concepts manually with a high grade of precision. The technical route to match only conceptually identical items and not similar ones could explain a lower percentage in this specific comparison of documentation contexts than expected.</p>
        <p>Our extended AML dataset has a high level of congruence to a general leukemia dataset, which has been previously published and checked by independent international hematologists for integrity and consistency [<xref ref-type="bibr" rid="ref13">13</xref>]. Previous work of Miotto and Wang [<xref ref-type="bibr" rid="ref26">26</xref>] identified 115 common possible data items in clinical trial feasibility of all studies registered on Clinicaltrials.gov based on a computational approach. Although majority of those are found in our collection (87.8%), only 20.3% of it are a part of Miotto and Wang’s list. None of our AML-specific laboratory items were found there, which indicates the specific focus on AML in this work.</p>
        <p>Implementation of the generated standard dataset can be used for different purposes: automatic generation of text modules in discharge letters, automated filling of cancer database forms, or any other. Comparison of the dataset with that of other entities to generate and complement a general basic clinical trial dataset could be another aim. NLP as a supplemental tool for annotating CRFs or other forms might speed up the manual annotation process [<xref ref-type="bibr" rid="ref27">27</xref>]. The quality of the annotations if not revised manually is of course questionable.</p>
        <p>Assigning UMLS codes to medical concepts is dependent on the personnel performing the coding (interrater agreement) and the existence of highly similar codes [<xref ref-type="bibr" rid="ref27">27</xref>]. In our case, the example of annotating the procedure or the result/value was questioned. One of the coders chose C0005821 <italic>blood platelets</italic>, the other agreed on C0032181 <italic>platelet count measurement</italic>, which was taken in the end. Our dataset can serve as a base for future annotations of AML CRFs.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The lack of standardization and semantical annotation of documentation for patients with AML is obvious. A high percentage of the documentation is performed as free text, which makes reusing information impossible without a lot of effort. As our research shows, there is a high overlap of data in clinical trial and routine documentation, as well as in clinical trial and register documentation. We identified a semantic core of data items which has been implemented in a highly structured format and can guide as a base for harmonized and efficient data collection and secondary use.</p>
        <p>The benefits of datasets for CDEs in other entities, not only neoplastic diseases, are obvious, especially widespread diseases such as cardiovascular, stroke, neurological, and others with the need of complex and/or long-term therapy can be addressed.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>List of all coded medical concepts.</p>
        <media xlink:href="medinform_v7i3e13554_app1.xlsx" xlink:title="XLSX File (Microsoft Excel File), 336KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ACF</term>
          <def>
            <p>absolute concept frequency</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ADT</term>
          <def>
            <p>Arbeitsgemeinschaft Deutscher Tumorzentren</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">AML</term>
          <def>
            <p>acute myeloid leukemia</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">aPTT</term>
          <def>
            <p>activated partial thromboplastin time</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CD34</term>
          <def>
            <p>cluster of differentiation 34</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">CDE</term>
          <def>
            <p>common data element</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">CDISC</term>
          <def>
            <p>Clinical Data Interchange Standards Consortium</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">CMV</term>
          <def>
            <p>Cytomegalie virus</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">CRF</term>
          <def>
            <p>case report form</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">EBMT</term>
          <def>
            <p>European Society for Blood and Marrow Transplantation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">ECOG</term>
          <def>
            <p>Eastern Co-operative Oncology Group</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">ELN</term>
          <def>
            <p>European Leukemia Network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">FAB</term>
          <def>
            <p>French-American-British-Classification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">GOT</term>
          <def>
            <p>glutamic oxaloacetic transaminase</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">GPT</term>
          <def>
            <p>glutamate pyruvate transaminase</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">HSCT</term>
          <def>
            <p>human stem cell transplant</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">INR</term>
          <def>
            <p>international normalized ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">MDM</term>
          <def>
            <p>Medical Data Models</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb21">ODM</term>
          <def>
            <p>Operational Data Model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb22">SAL</term>
          <def>
            <p>Study Alliance Leukemia</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb23">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb24">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work is funded by the German Research Foundation (Deutsche Forschungsgemeinschaft, DFG grant DU 352/11-1). The authors thank Roland Schroers and the Department for Hematology of the University Hospital Bochum-Langendreer for providing routine documentation forms.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ammenwerth</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Spötl</surname>
            <given-names>HP</given-names>
          </name>
        </person-group>
        <article-title>The time needed for clinical documentation versus direct patient care. A work-sampling analysis of physicians' activities</article-title>
        <source>Methods Inf Med</source>  
        <year>2009</year>  
        <volume>48</volume>  
        <issue>1</issue>  
        <fpage>84</fpage>  
        <lpage>91</lpage>  
        <pub-id pub-id-type="doi">10.3414/ME0569</pub-id>
        <pub-id pub-id-type="medline">19151888</pub-id>
        <pub-id pub-id-type="pii">09010084</pub-id></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Krumm</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Semjonow</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Tio</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Duhme</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Bürkle</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Haier</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Dugas</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Breil</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>The need for harmonized structured documentation and chances of secondary use - results of a systematic analysis with automated form comparison for prostate and breast cancer</article-title>
        <source>J Biomed Inform</source>  
        <year>2014</year>  
        <month>10</month>  
        <volume>51</volume>  
        <fpage>86</fpage>  
        <lpage>99</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(14)00093-8"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2014.04.008</pub-id>
        <pub-id pub-id-type="medline">24747879</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(14)00093-8</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Döhner</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Estey</surname>
            <given-names>EH</given-names>
          </name>
          <name name-style="western">
            <surname>Amadori</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Appelbaum</surname>
            <given-names>FR</given-names>
          </name>
          <name name-style="western">
            <surname>Büchner</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Burnett</surname>
            <given-names>AK</given-names>
          </name>
          <name name-style="western">
            <surname>Dombret</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Fenaux</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Grimwade</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Larson</surname>
            <given-names>RA</given-names>
          </name>
          <name name-style="western">
            <surname>Lo-Coco</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Naoe</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Niederwieser</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Ossenkoppele</surname>
            <given-names>GJ</given-names>
          </name>
          <name name-style="western">
            <surname>Sanz</surname>
            <given-names>MA</given-names>
          </name>
          <name name-style="western">
            <surname>Sierra</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Tallman</surname>
            <given-names>MS</given-names>
          </name>
          <name name-style="western">
            <surname>Löwenberg</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Bloomfield</surname>
            <given-names>CD</given-names>
          </name>
          <collab>European LeukemiaNet</collab>
        </person-group>
        <article-title>Diagnosis and management of acute myeloid leukemia in adults: recommendations from an international expert panel, on behalf of the European LeukemiaNet</article-title>
        <source>Blood</source>  
        <year>2010</year>  
        <month>01</month>  
        <day>21</day>  
        <volume>115</volume>  
        <issue>3</issue>  
        <fpage>453</fpage>  
        <lpage>74</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bloodjournal.org/cgi/pmidlookup?view=long&amp;pmid=19880497"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1182/blood-2009-07-235358</pub-id>
        <pub-id pub-id-type="medline">19880497</pub-id>
        <pub-id pub-id-type="pii">blood-2009-07-235358</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Büchner</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Schlenk</surname>
            <given-names>RF</given-names>
          </name>
          <name name-style="western">
            <surname>Schaich</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Döhner</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Krahl</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Krauter</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Heil</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Krug</surname>
            <given-names>U</given-names>
          </name>
          <name name-style="western">
            <surname>Sauerland</surname>
            <given-names>MC</given-names>
          </name>
          <name name-style="western">
            <surname>Heinecke</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Späth</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Kramer</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Scholl</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Berdel</surname>
            <given-names>WE</given-names>
          </name>
          <name name-style="western">
            <surname>Hiddemann</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Hoelzer</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Hehlmann</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Hasford</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Hoffmann</surname>
            <given-names>VS</given-names>
          </name>
          <name name-style="western">
            <surname>Döhner</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Ehninger</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Ganser</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Niederwieser</surname>
            <given-names>DW</given-names>
          </name>
          <name name-style="western">
            <surname>Pfirrmann</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Acute myeloid leukemia (AML): different treatment strategies versus a common standard arm--combined prospective analysis by the German AML intergroup</article-title>
        <source>J Clin Oncol</source>  
        <year>2012</year>  
        <month>10</month>  
        <day>10</day>  
        <volume>30</volume>  
        <issue>29</issue>  
        <fpage>3604</fpage>  
        <lpage>10</lpage>  
        <pub-id pub-id-type="doi">10.1200/JCO.2012.42.2907</pub-id>
        <pub-id pub-id-type="medline">22965967</pub-id>
        <pub-id pub-id-type="pii">JCO.2012.42.2907</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Getz</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>Protocol design trend and their effect on clinical trial performance</article-title>
        <source>RAJ Pharm</source>  
        <year>2008</year>  
        <volume>5</volume>  
        <fpage>315</fpage>  
        <lpage>6</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/285473087_Protocol_design_trends_and_their_effect_on_clinical_trial_performance"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dugas</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Meidt</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Neuhaus</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Storck</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Varghese</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>ODMedit: uniform semantic annotation for data integration in medicine based on a public metadata repository</article-title>
        <source>BMC Med Res Methodol</source>  
        <year>2016</year>  
        <month>12</month>  
        <day>1</day>  
        <volume>16</volume>  
        <fpage>65</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-016-0164-9"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/s12874-016-0164-9</pub-id>
        <pub-id pub-id-type="medline">27245222</pub-id>
        <pub-id pub-id-type="pii">10.1186/s12874-016-0164-9</pub-id>
        <pub-id pub-id-type="pmcid">PMC4888420</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Tapuria</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Bruland</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Delaney</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Kalra</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Curcin</surname>
            <given-names>V</given-names>
          </name>
        </person-group>
        <article-title>Comparison and transformation between CDISC ODM and EN13606 EHR standards in connecting EHR data with clinical trial research data</article-title>
        <source>Digit Health</source>  
        <year>2018</year>  
        <volume>4</volume>  
        <fpage>2055207618777676</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29942639"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1177/2055207618777676</pub-id>
        <pub-id pub-id-type="medline">29942639</pub-id>
        <pub-id pub-id-type="pii">10.1177_2055207618777676</pub-id>
        <pub-id pub-id-type="pmcid">PMC6016569</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ries</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Prokosch</surname>
            <given-names>HU</given-names>
          </name>
          <name name-style="western">
            <surname>Beckmann</surname>
            <given-names>MW</given-names>
          </name>
          <name name-style="western">
            <surname>Bürkle</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>Single-source tumor documentation - reusing oncology data for different purposes</article-title>
        <source>Onkologie</source>  
        <year>2013</year>  
        <volume>36</volume>  
        <issue>3</issue>  
        <fpage>136</fpage>  
        <lpage>41</lpage>  
        <pub-id pub-id-type="doi">10.1159/000348528</pub-id>
        <pub-id pub-id-type="medline">23486003</pub-id>
        <pub-id pub-id-type="pii">000348528</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dugas</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Jöckel</surname>
            <given-names>KH</given-names>
          </name>
          <name name-style="western">
            <surname>Friede</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Gefeller</surname>
            <given-names>O</given-names>
          </name>
          <name name-style="western">
            <surname>Kieser</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Marschollek</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Ammenwerth</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Röhrig</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Knaup-Gregori</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Prokosch</surname>
            <given-names>HU</given-names>
          </name>
        </person-group>
        <article-title>Memorandum 'open metadata'. Open access to documentation forms and item catalogs in healthcare</article-title>
        <source>Methods Inf Med</source>  
        <year>2015</year>  
        <volume>54</volume>  
        <issue>4</issue>  
        <fpage>376</fpage>  
        <lpage>8</lpage>  
        <pub-id pub-id-type="doi">10.3414/ME15-05-0007</pub-id>
        <pub-id pub-id-type="medline">26108979</pub-id>
        <pub-id pub-id-type="pii">15-05-0007</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Breil</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Semjonow</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Müller-Tidow</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Fritz</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Dugas</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>HIS-based Kaplan-Meier plots--a single source approach for documenting and reusing routine survival information</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2011</year>  
        <month>02</month>  
        <day>16</day>  
        <volume>11</volume>  
        <fpage>11</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-11-11"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1472-6947-11-11</pub-id>
        <pub-id pub-id-type="medline">21324182</pub-id>
        <pub-id pub-id-type="pii">1472-6947-11-11</pub-id>
        <pub-id pub-id-type="pmcid">PMC3053219</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sheehan</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Hirschfeld</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Foster</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Ghitza</surname>
            <given-names>U</given-names>
          </name>
          <name name-style="western">
            <surname>Goetz</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Karpinski</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Lang</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Moser</surname>
            <given-names>RP</given-names>
          </name>
          <name name-style="western">
            <surname>Odenkirchen</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Reeves</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Rubinstein</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Werner</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Huerta</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Improving the value of clinical research through the use of common data elements</article-title>
        <source>Clin Trials</source>  
        <year>2016</year>  
        <month>12</month>  
        <volume>13</volume>  
        <issue>6</issue>  
        <fpage>671</fpage>  
        <lpage>6</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27311638"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1177/1740774516653238</pub-id>
        <pub-id pub-id-type="medline">27311638</pub-id>
        <pub-id pub-id-type="pii">1740774516653238</pub-id>
        <pub-id pub-id-type="pmcid">PMC5133155</pub-id></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
        <source>Federal Ministry of Health (Germany)</source>  
        <access-date>2019-01-17</access-date>
        <comment>Was haben wir bisher erreicht? 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.bundesgesundheitsministerium.de/themen/praevention/nationaler-krebsplan/was-haben-wir-bisher-erreicht.html">https://www.bundesgesundheitsministerium.de/themen/praevention/nationaler-krebsplan/was-haben-wir-bisher-erreicht.html</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="75UMzGToj"/></comment> </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Varghese</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Holz</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Neuhaus</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Bernardi</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Boehm</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Ganser</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Gore</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Heaney</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Hochhaus</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Hofmann</surname>
            <given-names>WK</given-names>
          </name>
          <name name-style="western">
            <surname>Krug</surname>
            <given-names>U</given-names>
          </name>
          <name name-style="western">
            <surname>Müller-Tidow</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Weltermann</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>de Witte</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Hehlmann</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Dugas</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Key data elements in myeloid leukemia</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2016</year>  
        <volume>228</volume>  
        <fpage>282</fpage>  
        <lpage>6</lpage>  
        <pub-id pub-id-type="doi">10.3233/978-1-61499-678-1-282</pub-id>
        <pub-id pub-id-type="medline">27577388</pub-id></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dugas</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Missing semantic annotation in databases. The root cause for data integration and migration problems in information systems</article-title>
        <source>Methods Inf Med</source>  
        <year>2014</year>  
        <volume>53</volume>  
        <issue>6</issue>  
        <fpage>516</fpage>  
        <lpage>7</lpage>  
        <pub-id pub-id-type="doi">10.3414/ME14-04-0002</pub-id>
        <pub-id pub-id-type="medline">25377893</pub-id>
        <pub-id pub-id-type="pii">14-04-0002</pub-id></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ohmann</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Kuchinke</surname>
            <given-names>W</given-names>
          </name>
        </person-group>
        <article-title>Future developments of medical informatics from the viewpoint of networked clinical research. Interoperability and integration</article-title>
        <source>Methods Inf Med</source>  
        <year>2009</year>  
        <volume>48</volume>  
        <issue>1</issue>  
        <fpage>45</fpage>  
        <lpage>54</lpage>  
        <pub-id pub-id-type="medline">19151883</pub-id>
        <pub-id pub-id-type="pii">09010045</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>El Fadly</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Rance</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Lucas</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Mead</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Chatellier</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Lastic</surname>
            <given-names>PY</given-names>
          </name>
          <name name-style="western">
            <surname>Jaulent</surname>
            <given-names>MC</given-names>
          </name>
          <name name-style="western">
            <surname>Daniel</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Integrating clinical research with the healthcare enterprise: from the RE-USE project to the EHR4CR platform</article-title>
        <source>J Biomed Inform</source>  
        <year>2011</year>  
        <month>12</month>  
        <volume>44</volume>  
        <issue>Suppl 1</issue>  
        <fpage>S94</fpage>  
        <lpage>102</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(11)00125-0"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2011.07.007</pub-id>
        <pub-id pub-id-type="medline">21888989</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(11)00125-0</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Green</surname>
            <given-names>AK</given-names>
          </name>
          <name name-style="western">
            <surname>Reeder-Hayes</surname>
            <given-names>KE</given-names>
          </name>
          <name name-style="western">
            <surname>Corty</surname>
            <given-names>RW</given-names>
          </name>
          <name name-style="western">
            <surname>Basch</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Milowsky</surname>
            <given-names>MI</given-names>
          </name>
          <name name-style="western">
            <surname>Dusetzina</surname>
            <given-names>SB</given-names>
          </name>
          <name name-style="western">
            <surname>Bennett</surname>
            <given-names>AV</given-names>
          </name>
          <name name-style="western">
            <surname>Wood</surname>
            <given-names>WA</given-names>
          </name>
        </person-group>
        <article-title>The project data sphere initiative: accelerating cancer research by sharing data</article-title>
        <source>Oncologist</source>  
        <year>2015</year>  
        <month>05</month>  
        <volume>20</volume>  
        <issue>5</issue>  
        <fpage>464</fpage>  
        <lpage>e20</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://theoncologist.alphamedpress.org/cgi/pmidlookup?view=long&amp;pmid=25876994"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1634/theoncologist.2014-0431</pub-id>
        <pub-id pub-id-type="medline">25876994</pub-id>
        <pub-id pub-id-type="pii">theoncologist.2014-0431</pub-id>
        <pub-id pub-id-type="pmcid">PMC4425388</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Schiariti</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Fowler</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Brandenburg</surname>
            <given-names>JE</given-names>
          </name>
          <name name-style="western">
            <surname>Levey</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Mcintyre</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Sukal-Moulton</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Ramey</surname>
            <given-names>SL</given-names>
          </name>
          <name name-style="western">
            <surname>Rose</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Sienko</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Stashinko</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Vogtle</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Feldman</surname>
            <given-names>RS</given-names>
          </name>
          <name name-style="western">
            <surname>Koenig</surname>
            <given-names>JI</given-names>
          </name>
        </person-group>
        <article-title>A common data language for clinical research studies: the National Institute of Neurological Disorders and Stroke and American Academy for Cerebral Palsy and Developmental Medicine Cerebral Palsy Common Data Elements version 1.0 recommendations</article-title>
        <source>Dev Med Child Neurol</source>  
        <year>2018</year>  
        <month>12</month>  
        <volume>60</volume>  
        <issue>10</issue>  
        <fpage>976</fpage>  
        <lpage>86</lpage>  
        <pub-id pub-id-type="doi">10.1111/dmcn.13723</pub-id>
        <pub-id pub-id-type="medline">29542813</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Visser</surname>
            <given-names>O</given-names>
          </name>
          <name name-style="western">
            <surname>Trama</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Maynadié</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Stiller</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Marcos-Gragera</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>de Angelis</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Mallone</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Tereanu</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Allemani</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Ricardi</surname>
            <given-names>U</given-names>
          </name>
          <name name-style="western">
            <surname>Schouten</surname>
            <given-names>HC</given-names>
          </name>
          <collab>RARECARE Working Group</collab>
        </person-group>
        <article-title>Incidence, survival and prevalence of myeloid malignancies in Europe</article-title>
        <source>Eur J Cancer</source>  
        <year>2012</year>  
        <month>11</month>  
        <volume>48</volume>  
        <issue>17</issue>  
        <fpage>3257</fpage>  
        <lpage>66</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.ejca.2012.05.024</pub-id>
        <pub-id pub-id-type="medline">22770878</pub-id>
        <pub-id pub-id-type="pii">S0959-8049(12)00469-8</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
        <source>National Library of Medicine - National Institutes of Health</source>  
        <access-date>2019-01-17</access-date>
        <comment>What is a CDE? 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nlm.nih.gov/cde/glossary.html#cdedefinition">https://www.nlm.nih.gov/cde/glossary.html#cdedefinition</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="75UNGovFz"/></comment> </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
        <source>UMLS Terminology Services</source>  
        <access-date>2019-04-15</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://uts.nlm.nih.gov//home.html">https://uts.nlm.nih.gov//home.html</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="77eJK1zGY"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dugas</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Neuhaus</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Meidt</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Doods</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Storck</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Bruland</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Varghese</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Portal of medical data models: information infrastructure for medical research and healthcare</article-title>
        <source>Database (Oxford)</source>  
        <year>2016</year>  
        <volume>2016</volume>  
        <fpage>pii: bav121</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26868052"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1093/database/bav121</pub-id>
        <pub-id pub-id-type="medline">26868052</pub-id>
        <pub-id pub-id-type="pii">bav121</pub-id>
        <pub-id pub-id-type="pmcid">PMC4750548</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Varghese</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Dugas</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Frequency analysis of medical concepts in clinical trials and their coverage in MeSH and SNOMED-CT</article-title>
        <source>Methods Inf Med</source>  
        <year>2015</year>  
        <volume>54</volume>  
        <issue>1</issue>  
        <fpage>83</fpage>  
        <lpage>92</lpage>  
        <pub-id pub-id-type="doi">10.3414/ME14-01-0046</pub-id>
        <pub-id pub-id-type="medline">25346408</pub-id>
        <pub-id pub-id-type="pii">14-01-0046</pub-id></nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Varghese</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Fujarski</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Hegselmann</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Neuhaus</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Dugas</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>CDEGenerator: an online platform to learn from existing data models to build model registries</article-title>
        <source>Clin Epidemiol</source>  
        <year>2018</year>  
        <volume>10</volume>  
        <fpage>961</fpage>  
        <lpage>70</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.2147/CLEP.S170075"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2147/CLEP.S170075</pub-id>
        <pub-id pub-id-type="medline">30127646</pub-id>
        <pub-id pub-id-type="pii">clep-10-961</pub-id>
        <pub-id pub-id-type="pmcid">PMC6089100</pub-id></nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Holz</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Common data elements for acute myeloid leukemia</article-title>
        <source>The Medical Data Models Portal</source>  
        <year>2018</year>  
        <pub-id pub-id-type="doi">10.21961/mdm:31429</pub-id></nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Miotto</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Weng</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Unsupervised mining of frequent tags for clinical eligibility text indexing</article-title>
        <source>J Biomed Inform</source>  
        <year>2013</year>  
        <month>12</month>  
        <volume>46</volume>  
        <issue>6</issue>  
        <fpage>1145</fpage>  
        <lpage>51</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(13)00140-8"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2013.08.012</pub-id>
        <pub-id pub-id-type="medline">24036004</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(13)00140-8</pub-id>
        <pub-id pub-id-type="pmcid">PMC3843986</pub-id></nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lingren</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Deleger</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Molnar</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Zhai</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Meinzen-Derr</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Kaiser</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Stoutenborough</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Solti</surname>
            <given-names>I</given-names>
          </name>
        </person-group>
        <article-title>Evaluating the impact of pre-annotation on annotation speed and potential bias: natural language processing gold standard development for clinical named entity recognition in clinical trial announcements</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2014</year>  
        <volume>21</volume>  
        <issue>3</issue>  
        <fpage>406</fpage>  
        <lpage>13</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24001514"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001837</pub-id>
        <pub-id pub-id-type="medline">24001514</pub-id>
        <pub-id pub-id-type="pii">amiajnl-2013-001837</pub-id>
        <pub-id pub-id-type="pmcid">PMC3994857</pub-id></nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
