<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn></journal-meta><article-meta><article-id pub-id-type="publisher-id">46725</article-id><article-id pub-id-type="doi">10.2196/46725</article-id><title-group><article-title>A Pragmatic Method to Integrate Data From Preexisting Cohort Studies Using the Clinical Data Interchange Standards Consortium (CDISC) Study Data Tabulation Model: Case Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Matsuzaki</surname><given-names>Keiichi</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kitayama</surname><given-names>Megumi</given-names></name><degrees>RN, MSc</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yamamoto</surname><given-names>Keiichi</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Aida</surname><given-names>Rei</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Imai</surname><given-names>Takumi</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ishida</surname><given-names>Mami</given-names></name><degrees>DPH, MD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Katafuchi</surname><given-names>Ritsuko</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff7">7</xref><xref ref-type="aff" rid="aff8">8</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kawamura</surname><given-names>Tetsuya</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff9">9</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yokoo</surname><given-names>Takashi</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff9">9</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Narita</surname><given-names>Ichiei</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff10">10</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Suzuki</surname><given-names>Yusuke</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff11">11</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Public Health, School of Medicine, Kitasato University</institution>, <addr-line>Sagamihara</addr-line>, <country>Japan</country></aff><aff id="aff2"><institution>Clinical Study Support Center, Wakayama Medical University Hospital</institution>, <addr-line>Wakayama</addr-line>, <country>Japan</country></aff><aff id="aff3"><institution>Translational Research Institute for Medical Innovation, Osaka Dental University</institution>, <addr-line>Osaka</addr-line>, <country>Japan</country></aff><aff id="aff4"><institution>Department of Medical Statistics, Osaka Metropolitan University</institution>, <addr-line>Osaka</addr-line>, <country>Japan</country></aff><aff id="aff5"><institution>Clinical &#x0026; Translational Research Center, Kobe University Hospital</institution>, <addr-line>Kobe</addr-line>, <country>Japan</country></aff><aff id="aff6"><institution>Department of Medical Informatics and Clinical Epidemiology, Kyoto Prefectural University of Medicine</institution>, <addr-line>Kyoto</addr-line>, <country>Japan</country></aff><aff id="aff7"><institution>Kidney Unit, National Hospital Organization Fukuokahigashi Medical Center</institution>, <addr-line>Fukuoka</addr-line>, <country>Japan</country></aff><aff id="aff8"><institution>Kidney Unit, Medical Corporation Houshikai Kano Hospital</institution>, <addr-line>Fukuoka</addr-line>, <country>Japan</country></aff><aff id="aff9"><institution>Division of Kidney and Hypertension, Department of Internal Medicine, Jikei University School of Medicine</institution>, <addr-line>Tokyo</addr-line>, <country>Japan</country></aff><aff id="aff10"><institution>Division of Clinical Nephrology and Rheumatology, Graduate School of Medical and Dental Sciences, Niigata University</institution>, <addr-line>Niigata</addr-line>, <country>Japan</country></aff><aff id="aff11"><institution>Department of Nephrology, Faculty of Medicine, Juntendo University</institution>, <addr-line>Tokyo</addr-line>, <country>Japan</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Klann</surname><given-names>Jeffrey</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Gao</surname><given-names>Aijing</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Loban</surname><given-names>Amanda</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Hume</surname><given-names>Sam</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Keiichi Matsuzaki, MD, PhD<email>matsuzaki.keiichi@kitasato-u.ac.jp</email></corresp></author-notes><pub-date pub-type="collection"><year>2023</year></pub-date><pub-date pub-type="epub"><day>21</day><month>12</month><year>2023</year></pub-date><volume>11</volume><elocation-id>e46725</elocation-id><history><date date-type="received"><day>11</day><month>03</month><year>2023</year></date><date date-type="rev-recd"><day>13</day><month>09</month><year>2023</year></date><date date-type="accepted"><day>14</day><month>09</month><year>2023</year></date></history><copyright-statement>&#x00A9; Keiichi Matsuzaki, Megumi Kitayama, Keiichi Yamamoto, Rei Aida, Takumi Imai, Mami Ishida, Ritsuko Katafuchi, Tetsuya Kawamura, Takashi Yokoo, Ichiei Narita, Yusuke Suzuki. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 21.12.2023. </copyright-statement><copyright-year>2023</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2023/1/e46725"/><abstract><sec><title>Background</title><p>In recent years, many researchers have focused on the use of legacy data, such as pooled analyses that collect and reanalyze data from multiple studies. However, the methodology for the integration of preexisting databases whose data were collected for different purposes has not been established. Previously, we developed a tool to efficiently generate Study Data Tabulation Model (SDTM) data from hypothetical clinical trial data using the Clinical Data Interchange Standards Consortium (CDISC) SDTM.</p></sec><sec><title>Objective</title><p>This study aimed to design a practical model for integrating preexisting databases using the CDISC SDTM.</p></sec><sec sec-type="methods"><title>Methods</title><p>Data integration was performed in three phases: (1) the confirmation of the variables, (2) SDTM mapping, and (3) the generation of the SDTM data. In phase 1, the definitions of the variables in detail were confirmed, and the data sets were converted to a vertical structure. In phase 2, the items derived from the SDTM format were set as mapping items. Three types of metadata (domain name, variable name, and test code), based on the CDISC SDTM, were embedded in the Research Electronic Data Capture (REDCap) field annotation. In phase 3, the data dictionary, including the SDTM metadata, was outputted in the Operational Data Model (ODM) format. Finally, the mapped SDTM data were generated using REDCap2SDTM version 2.</p></sec><sec sec-type="results"><title>Results</title><p>SDTM data were generated as a comma-separated values file for each of the 7 domains defined in the metadata. A total of 17 items were commonly mapped to 3 databases. Because the SDTM data were set in each database correctly, we were able to integrate 3 independently preexisting databases into 1 database in the CDISC SDTM format.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Our project suggests that the CDISC SDTM is useful for integrating multiple preexisting databases.</p></sec></abstract><kwd-group><kwd>data warehousing</kwd><kwd>data management</kwd><kwd>database integration</kwd><kwd>integrate multiple data sets</kwd><kwd>Study Data Tabulation Model</kwd><kwd>SDTM</kwd><kwd>Clinical Data Interchange Standards Consortium</kwd><kwd>CDISC</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>To use medical databases efficiently in clinical research, methods that efficiently integrate multiple databases must be established. The International Committee of Medical Journal Editors (ICMJE) requires researchers to include a data sharing statement when submitting a manuscript [<xref ref-type="bibr" rid="ref1">1</xref>]. Moreover, there is a growing focus on the sharing of clinical research data and its uses. However, the current ICMJE statement makes no mention of specific data standards for data sharing. Therefore, a discussion regarding specific ways to share data collected in clinical research is needed.</p><p>Recently, several medical societies and research groups have formed registries and conducted large cohort studies. The integration of databases with the same disease focus enables the analysis of data for many end points and patients. The reanalysis of data comprising large cohorts such as pooled analysis has statistical power and derives more reliable results [<xref ref-type="bibr" rid="ref2">2</xref>]. For example, the Premenopausal Breast Cancer Collaboration, supported by the National Cancer Institute in the United States, published the results of several studies that used pooled analysis methods to integrate data from 20 independent cohort studies [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>The Clinical Data Interchange Standards Consortium (CDISC) is a nonprofit, global organization that has developed several data standards to streamline clinical research [<xref ref-type="bibr" rid="ref4">4</xref>]. The Study Data Tabulation Model (SDTM) is a data standard model for the sharing and integration of research data, which was initially developed to standardize the tabulation of clinical trial data submitted to the Food and Drug Administration (FDA) [<xref ref-type="bibr" rid="ref5">5</xref>]. The concept of the CDISC SDTM is shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>. The CDISC SDTM consists of several domains derived from clinical aspects, and each domain is identified by a unique 2-letter code [<xref ref-type="bibr" rid="ref6">6</xref>]. Metadata are described in the data definition document named &#x201C;Define&#x201D; that is submitted with the data to regulatory authorities [<xref ref-type="bibr" rid="ref7">7</xref>]. Each data item collected in different databases, using the SDTM and Define.xml, enables one to unify variable names and codes easily. Clinical research data warehouses using the CDISC SDTM are considered useful for data sharing in academic research.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Clinical Data Interchange Standards Consortium (CDISC) Study Data Tabulation Model (SDTM) concepts. eCRF: electronic case report form.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v11i1e46725_fig01.png"/></fig><p>The integration of multiple data sets is difficult even among studies focused on the same disease. Major hurdles of data integration include the lack of standardization for the data format, variable names, and variable codes. Due to these problems, the manual conversion of data involves a large workload, which is likely to incur human error. Because the standardization of variable names and codes makes it easy to build a statistical data set, the CDISC SDTM provides a unique solution for database integration. However, many cohort studies have been conducted using a paper case report form (CRF) and formatted into data sets as a comma-separated values file or a spreadsheet file. It is difficult to convert these legacy data sets into the CDISC SDTM format because the variables need to refer to the CDISC variables and controlled terminology (CT).</p><p>Research Electronic Data Capture (REDCap) is an electronic data capture system developed by Vanderbilt University [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. The &#x201C;field annotation&#x201D; function, introduced in REDCap version 6.5, can store meta-information for various standards related to clinical research, such as the CDISC, Systematized Nomenclature of Medicine (SNOMED), and Logical Observation Identifiers Names and Codes (LOINC). We previously developed &#x201C;REDCap2SDTM,&#x201D; a tool for parsing SDTM meta-information in the &#x201C;field annotation&#x201D; function and generating an XML file (Define-XML v2.0) with SDTM data [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. This tool enables the efficient generation of SDTM data from multiple preexisting research data sets, and it has been validated for SDTM data generation based on hypothetical clinical trial data. However, only a few data integration projects using an actual research data set were carried out [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>].</p><p>The purpose of this project was to design a practical working model for integrating preexisting databases using the CDISC SDTM. Here, we report the pragmatic conversion of multiple preexisting databases based on the CDISC SDTM format.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Ethical Considerations</title><p>Since this study was conducted on the structure of the database and not on patients, this study is outside the scope of ethical guidelines.</p></sec><sec id="s2-2"><title>Project Structure</title><p>This project required multiple skill sets. A board-certified nephrologist (Japanese Society of Nephrology) with expertise in immunoglobulin A (IgA) nephropathy (including patient characteristics, laboratory data, and disease-specific items) confirmed the data structure in detail and constructed the independent database in REDCap. In parallel, a clinical data manager with CDISC SDTM expertise set the SDTM metadata in each variable. We outsourced the modification of REDCap2SDTM to a contract research organization to improve the efficiency of the SDTM data generation. The diagram of the study structure is shown in <xref ref-type="fig" rid="figure2">Figure 2</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Flowchart of the data integration. CT: controlled terminology; ODM: Operational Data Model; REDCap: Research Electronic Data Capture; SDTM: Study Data Tabulation Model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v11i1e46725_fig02.png"/></fig></sec><sec id="s2-3"><title>Data Source</title><p>IgA nephropathy is the most common type of chronic glomerulonephritis in Japan. IgA nephropathy is a refractory disease in which 30% to 40% of patients reach end-stage renal failure after approximately 20 years [<xref ref-type="bibr" rid="ref16">16</xref>]. Various clinical features and a chronic course are the hallmark of this disease; therefore, a database that collects multiple items and a prognosis is needed. To date, the IgA Nephropathy Working Group in Progressive Renal Diseases Research, affiliated with Research on Intractable Diseases from the Ministry of Health, Labor and Welfare of Japan, has conducted 3 cohort studies with over 1000 participants in each cohort. However, the collected items and data structures in each cohort study were not standardized, making the construction of an integrated database difficult. The number of collected items and the data structure of each cohort are shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Characteristics of each cohort studies.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Cohort</td><td align="left" valign="bottom">Items in the data set, n</td><td align="left" valign="bottom">Sites, n</td><td align="left" valign="bottom">Data structure</td></tr></thead><tbody><tr><td align="left" valign="top">A</td><td align="left" valign="top">57</td><td align="char" char="." valign="top">6</td><td align="left" valign="top">Vertical format: with repeated measurement data</td></tr><tr><td align="left" valign="top">B</td><td align="left" valign="top">65</td><td align="char" char="." valign="top">6</td><td align="left" valign="top">Horizontal format: no repeated measurement data</td></tr><tr><td align="left" valign="top">C</td><td align="left" valign="top">582</td><td align="char" char="." valign="top">42</td><td align="left" valign="top">Horizontal format: no repeated measurement data</td></tr></tbody></table></table-wrap></sec><sec id="s2-4"><title>Outline of Multiple Database Integration Work</title><p>The integration of multiple preexisting databases comprised the following three phases: (1) the confirmation of the variables in detail, (2) SDTM mapping, and (3) SDTM data generation and integration. The details of each phase are given below.</p><sec id="s2-4-1"><title>Phase 1: Confirmation of the Variables in Detail</title><p>In most cases, variable names differ by study, and the types of data also vary (date, digits, categorical variable, etc). Therefore, we set common values between each database in this phase.</p><p>Preexisting research data are stored in various formats between studies, including spreadsheets with a horizontal data (denormalized) structure. Since many SDTM domains are defined by a vertical data (normalized) structure, the data structure was transformed.</p><p>The main tasks of this phase were as follows:</p><list list-type="bullet"><list-item><p>Standardize the variables in detail: code categorical data and nominal variables, unify date types (eg, YYYY/MM/DD), and improve the data format and the number of digits in clinical laboratory data in each data set</p></list-item><list-item><p>Manage the data structure: transform repeated data from a horizontal structure to a vertical structure</p></list-item><list-item><p>Validate the definition of variables: clarify data definitions and create a definition document in cooperation with specialists</p></list-item></list></sec><sec id="s2-4-2"><title>Phase 2: SDTM Mapping</title><p>The CDISC has CT [<xref ref-type="bibr" rid="ref17">17</xref>], and the terms used for each variable are specified in the <italic>SDTM Implementation Guide</italic> [<xref ref-type="bibr" rid="ref6">6</xref>]. Through the use of CT, variables that were arbitrarily coded in different data sets can be derived as the same code. For example, if 1 data set coded male individuals as 1 and female individuals as 2 and another data set coded male individuals as 0 and female individuals as 1, the CT would code male individuals as &#x201C;M&#x201D; and female individuals as &#x201C;F.&#x201D; Therefore, the SDTM format data sets derived &#x201C;M&#x201D; for male individuals and &#x201C;F&#x201D; for female individuals. However, not all codes have specified CT, and coding lists for variables that are not specified must be created.</p><p>The domain model of the SDTM has a fixed domain of evaluation items to be stored. Therefore, each item in the data set must be mapped to the appropriate domain. For example, the &#x201C;DM&#x201D; domain contains the background of the patients (demographics), which includes age, sex, and race. The variable names were specified in each domain of the SDTM, for example, &#x201C;SEX&#x201D; for sex and &#x201C;LBORRES&#x201D; for laboratory results. Items with a unique code, such as sex, do not require a test code; the metadata are defined by the domain name &#x201C;DM,&#x201D; and the variable is named &#x201C;SEX.&#x201D; For items with various kinds of values, such as serum creatinine, a test code needs to be specified. For example, the meta-information of the creatinine test value must be defined by the domain name &#x201C;LB,&#x201D; the variable name &#x201C;LBORRES,&#x201D; and the test code &#x201C;CREAT.&#x201D; In addition, disease-specific end points are not defined in the standard domain of the SDTM. The SDTM does not allow new variables to be added arbitrarily; therefore, new variables must be defined in conjunction with the parent record using &#x201C;supplemental qualifiers.&#x201D; We determined the SDTM test code based on the appropriate code list from the SDTM CT.</p><p>Generally, in clinical studies, nominal scales (eg, male and female) are replaced by codes in the analysis. The method of assigning the code differs depending on the research and the data set, and recoding is necessary during database integration. We set both the domain and the meta-information of each data set based on the definitions confirmed in phase 1.</p><p>The main tasks of this phase were as follows:</p><list list-type="bullet"><list-item><p>Recoding: map nominal variables and codes according to the CT or custom coding lists</p></list-item><list-item><p>SDTM metadata mapping: map existing data variables to the SDTM domains</p></list-item></list></sec><sec id="s2-4-3"><title>Phase 3: Generate SDTM Data in the Operational Data Model Format</title><p>In this phase, the SDTM metadata were manually set in the &#x201C;field annotation&#x201D; function (<xref ref-type="fig" rid="figure3">Figure 3</xref>). Subsequently, the data, including the data dictionary with the SDTM metadata, were downloaded in the Operational Data Model (ODM) format with SDTM metadata. Finally, REDCap2SDTM automatically generated each data set in the ODM format with the SDTM metadata.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Screenshot of &#x201C;field annotation.&#x201D; SDTM: Study Data Tabulation Model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v11i1e46725_fig03.png"/></fig><p>Common items in all data sets must be assigned the same metadata. Therefore, it is necessary to identify common items in all data sets to confirm the consistency of the metadata.</p><p>The main tasks of this phase were listed as follows:</p><list list-type="bullet"><list-item><p>Check the consistency of the metadata: unify common items between each data set</p></list-item><list-item><p>Generate SDTM metadata from each database: download and record data and data dictionaries and generate SDTM metadata with REDCap2SDTM</p></list-item><list-item><p>Output SDTM metadata in the ODM format: retrieve the SDTM metadata output from REDCap2SDTM</p></list-item></list><p>A summary of the data integration process is shown in <xref ref-type="fig" rid="figure4">Figure 4</xref>.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Diagram of the study structure. CDISC: Clinical Data Interchange Standards Consortium; ODM: Operational Data Model; REDCap: Research Electronic Data Capture; SDTM: Study Data Tabulation Model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v11i1e46725_fig04.png"/></fig></sec></sec><sec id="s2-5"><title>REDCap2SDTM Version 2</title><p>REDCap2SDTM combined formatted ODM data that were embedded with 3 pieces of metadata (ie, SDTM domain name, variable name, and test code) into the field annotation of REDCap as the metadata corresponding to the variable name of the data set, to convert the database into the SDTM format. This tool dynamically generates SDTM data and a Define.xml file by parsing. The syntax of the meta-information is the CDISC Define-XML version 2.0 &#x201C;ItemDef element.&#x201D; REDCap2SDTM version 2 parses the object identifier attribute value and uses that information for mapping (eg, &#x201C;IT.VS.VSORRES. SYSBP&#x201D; and &#x201C;IT.AE. AETERM&#x201D;) [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>The CDISC ODM is a vendor-neutral, platform-independent data format for exchanging and storing clinical research data and metadata that can be shared between different software systems [<xref ref-type="bibr" rid="ref18">18</xref>]. In this case, we modified REDCap2SDTM to adopt the CDISC ODM format (REDCap2SDTM version 2; <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Due to this modification, REDCap2SDTM version 2 could convert the SDTM data to the ODM format and could expand to handle variables across multiple domains.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>This project was conducted from July 2018 to January 2019. Items were selected for integration in the SDTM metadata based on the opinions of the board-certified nephrologist.</p><p>Regarding disease-specific items, the histological classification of the disease was defined in the &#x201C;SUPPMH&#x201D; domain; items related to family history were defined in the &#x201C;SUPPDM&#x201D; domain, and the number of steroid pulse therapies was defined in the &#x201C;SUPPCM&#x201D; domain. The following domains were generated in this study: &#x201C;DM&#x201D; (demographics), &#x201C;CM&#x201D; (concomitant medications), &#x201C;LB&#x201D; (laboratory test results), &#x201C;VS&#x201D; (vital signs), &#x201C;SUPPCM,&#x201D; &#x201C;SUPPDM,&#x201D; and &#x201C;SUPPMH.&#x201D;</p><p>The preexisting database included 57 total items for cohort A, 65 total items for cohort B, and 582 total items for cohort C. The metadata were set for 40 items for cohort A, 18 items for cohort B, and 102 items for cohort C. We found 17 common items. Finally, a total of 119 items were set for the SDTM metadata. Of these, 56 items used the nominal scale, 48 items could be recoded using CT, and 8 items required independently created code lists. Disease-specific items, such as the pathological classification based on the clinical guidelines for IgA nephropathy in Japan [<xref ref-type="bibr" rid="ref19">19</xref>] and the Oxford classification [<xref ref-type="bibr" rid="ref20">20</xref>], required their own code list. <xref ref-type="table" rid="table2">Table 2</xref> lists the SDTM metadata of key items.</p><p>The data dictionary and ODM data were outputted from REDCap, and REDCap2SDTM version 2 was used to output the data in the SDTM format. The items defined by individual names in each database were collated based on the metadata by the CDISC SDTM.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>SDTM<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> metadata of the key items.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Items</td><td align="left" valign="bottom">Cohort A</td><td align="left" valign="bottom">Cohort B</td><td align="left" valign="bottom">Cohort C</td><td align="left" valign="bottom">SDTM metadata</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Sex</td><td align="left" valign="top">Sex</td><td align="left" valign="top">Sex</td><td align="left" valign="top">Sex</td><td align="left" valign="top">DM.SEX</td></tr><tr><td align="left" valign="top" colspan="2">Birthday</td><td align="left" valign="top">Birth_date(Y/M/D)</td><td align="left" valign="top">Birth_date(Y/M/D)</td><td align="left" valign="top">birth date</td><td align="left" valign="top">DM.BRTHDTC</td></tr><tr><td align="left" valign="top" colspan="2">Age</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Age</td><td align="left" valign="top">DM.AGE</td></tr><tr><td align="left" valign="top" colspan="6"><bold>Vital sign</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Systolic blood pressure</td><td align="left" valign="top">sbp_bx</td><td align="left" valign="top">sbp_bx</td><td align="left" valign="top">Sbp</td><td align="left" valign="top">VS.VSORRES.SYSBP</td></tr><tr><td align="left" valign="top" colspan="6"><bold>Concomitant drugs</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Renin-angiotensin system inhibitor</td><td align="left" valign="top">rasb_prior</td><td align="left" valign="top">rasb_prior</td><td align="left" valign="top">Ras</td><td align="left" valign="top">CM.CMOCCUR.RAS</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Date of first immunosuppressants</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Day</td><td align="left" valign="top">CM.CMSTDTC.PSL</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Prednisolone (yes or no)</td><td align="left" valign="top">IS_bx</td><td align="left" valign="top">fuSteroids_bx</td><td align="left" valign="top">ral steroid p or a</td><td align="left" valign="top">CM.CMOCCUR.PSL</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Immunosuppressants without prednisolone (yes or no)</td><td align="left" valign="top">Non_steroid_IS</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">immuno therapy</td><td align="left" valign="top">CM.CMOCCUR.PSLOTH</td></tr><tr><td align="left" valign="top" colspan="6"><bold>Tonsillectomy</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Tonsillectomy (yes or no)</td><td align="left" valign="top">tonsillectomy</td><td align="left" valign="top">fu_tonsillectomy</td><td align="left" valign="top">Tonsil</td><td align="left" valign="top">SUPPMH.QNAM.OPE</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Date of tonsillectomy</td><td align="left" valign="top">tonsillectomy_dt</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">tonsil date</td><td align="left" valign="top">SUPPMH.QNAM.OPEDATE</td></tr><tr><td align="left" valign="top" colspan="6"><bold>Laboratory examinations</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Date of kidney biopsy</td><td align="left" valign="top">date_bx</td><td align="left" valign="top">date_bx</td><td align="left" valign="top">kidney_biopsy_date</td><td align="left" valign="top">LB.LBDTC.BIOPSY</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Serum creatinine</td><td align="left" valign="top">Creatinine</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Cr</td><td align="left" valign="top">LB.LBORRES.CREAT</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">eGFR<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">eGFR</td><td align="left" valign="top">gfr_bx_provided</td><td align="left" valign="top">Egfr</td><td align="left" valign="top">LB.LBORRES.EGFR</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Urinary protein (spot)</td><td align="left" valign="top">uprot_bx</td><td align="left" valign="top">uprot</td><td align="left" valign="top">urinprotein1</td><td align="left" valign="top">LB.LBORRES.PROT1</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Urinary protein (24 h)</td><td align="left" valign="top">uprot_24h_bx_provided</td><td align="left" valign="top">uprot_24h</td><td align="left" valign="top">Urinprotein</td><td align="left" valign="top">LB.LBORRES.PROT24</td></tr><tr><td align="left" valign="top" colspan="6"><bold>Pathological findings</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Oxford classification: mesangial hypercellularity (M)</td><td align="left" valign="top">m</td><td align="left" valign="top">m</td><td align="left" valign="top">Oxford1</td><td align="left" valign="top">SUPPMH.QNAM.M</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Oxford classification: endocapillary hypercellularity (E)</td><td align="left" valign="top">e</td><td align="left" valign="top">e</td><td align="left" valign="top">Oxford2</td><td align="left" valign="top">SUPPMH.QNAM.E</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Oxford classification: segmental glomerulosclerosis (S)</td><td align="left" valign="top">s</td><td align="left" valign="top">s</td><td align="left" valign="top">Oxford3</td><td align="left" valign="top">SUPPMH.QNAM.S</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Oxford classification: tubular atrophy/interstitial fibrosis (T)</td><td align="left" valign="top">t</td><td align="left" valign="top">t</td><td align="left" valign="top">Oxford4</td><td align="left" valign="top">SUPPMH.QNAM.T</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>SDTM: Study Data Tabulation Model.</p></fn><fn id="table2fn2"><p><sup>b</sup>Not available.</p></fn><fn id="table2fn3"><p><sup>c</sup>eGFR: estimated glomerular filtration rate.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Strength of This Study</title><p>Integrating multiple preexisting databases through collaboration between the disease specialist and clinical data manager enabled the use of legacy data. Our project suggested that properly defining CDISC SDTM metadata allowed for the integration of multiple preexisting databases. In this paper, we focused on the technical aspect. Although the utility of this concept has been verified with hypothetical data, there are few reports that generate SDTM data from actual clinical databases focused on technical aspects in detail.</p></sec><sec id="s4-2"><title>The CDISC SDTM</title><p>The definition of metadata using the CDISC SDTM is important. The CDISC is a nonprofit, global organization that consists of pharmaceutical companies, contract research organizations, academic research organizations, and IT vendors. Pharmaceutical companies and contract research organizations account for 34% of the entities within the CDISC, whereas academic research organizations account for only 7% [<xref ref-type="bibr" rid="ref21">21</xref>]. This imbalance may have arisen because those submitting a regulatory application to the FDA or the Pharmaceuticals and Medical Device Agency are required to comply with CDISC standards [<xref ref-type="bibr" rid="ref22">22</xref>]. Therefore, there is a strong awareness of the CDISC as a tool for regulatory submissions, but few researchers are aware that the CDISC SDTM concept can be used to standardize data.</p><p>The mission of the CDISC is to develop and support global, platform-independent data standards that enable information system interoperability to improve medical research and related areas of health care. Following this statement, we have succeeded in integrating 3 databases by incorporating the CDISC SDTM concept into the standardization of multiple databases. Since this database complies with the standardization of the CDISC SDTM, this integrated database can be compared to other clinical trials or it can be used as a historical control. Our study shows that the CDISC SDTM is not only a necessary tool for applying for the approval of regulatory submissions but also for data standardization and integration. In recent years, the CDISC has partnered with REDCap to make Clinical Data Acquisition Standards Harmonization eCRF metadata available in the REDCap Shared Instrument Library [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. It is expected that the researchers will be able to import CDISC SDTM metadata directly into their REDCap projects for immediate use in clinical trial data collection. In the future, CDISC SDTM data will be generated more easily.</p><p>We were able to develop the methodology for integrating multiple preexisting databases in just 6 months. This timely integration was due to the collaboration of specialists in the disease area and a data manager familiar with the CDISC SDTM, allowing each phase to proceed simultaneously and resulting in a fast integration time. Inconsistencies in the coding method of the nominal scale hindered the integration of multiple databases. However, in this study, codes defined individually for each database were automatically recoded to substantially reduce the required work hours, which also contributed to the fast integration time. When coding terms not defined by CT, such as terms that are specific to the disease area, a code list should be created following thorough discussions with specialists, referring to therapeutic area standards [<xref ref-type="bibr" rid="ref24">24</xref>]. It is important to improve work efficiency by making the best use of existing materials. Although the preexisting databases were integrated in this study, even in cases where the data were updated longitudinally, it is possible to integrate data with the SDTM, provided that the meta-information for the evaluated item is defined.</p><p>Currently, there are several medical standards. Observational Medical Outcomes Partnership (OMOP), which is managed by Observational Health Data Science and Informatics [<xref ref-type="bibr" rid="ref25">25</xref>], aimed to standardize interoperability observational databases such as electric medical records and claim data. HL7 Fast Healthcare Interoperability Resources (FHIR) [<xref ref-type="bibr" rid="ref26">26</xref>] is the standard for medical information exchange. In this study, we used the CDISC SDTM because, at the time, it was the most widely used standard with many accumulated findings. We plan to expand this project to support the OMOP Common Data Model and FHIR in the future.</p></sec><sec id="s4-3"><title>Issues for Integration</title><p>We observed the following points when integrating the preexisting clinical databases: (1) the variability of the collected items and (2) the complexity of the test code. The items in the preexisting cohort studies used in this project were not standardized and were not defined in detail; therefore, we clarified the meaning of the variables based on expert opinions. Clarifying data definitions is difficult for data managers who lack the requisite background knowledge.</p><p>In addition, we were faced with large differences in the number of items collected from each preexisting cohort study. As previously mentioned, 57 and 65 items were collected in cohorts A and B, respectively, far fewer than the 582 items collected in cohort C, which included data related to concomitant medications. However, because information on concomitant medications is often missing, it is considered a difficult item to use for analysis. Generally, information on concomitant medications is not used for analysis and is not collected in precise clinical trials. To avoid complications in the integration process, information collected on concomitant medications should focus on those related to the disease area or should be divided into categories prior to collection. These findings were obtained by scrutinizing the differences in the items collected in each database prior to generating the metadata.</p><p>The complexity of the test code was clarified during the generation of the metadata. As described above, the amount of the urinary protein was defined as both &#x201C;PROT&#x201D; and &#x201C;PROT24.&#x201D; Because the details of proteinuria are not defined in CT, there is a risk for inappropriate metadata. These findings suggest that the generation of metadata requires a deep understanding of the disease in addition to the concepts of the CDISC SDTM. In this study, the clinical data manager who had knowledge of the CDISC SDTM was responsible for generating the metadata in collaboration with a specialist in the disease area. Currently, clinical data managers primarily play an active role in prospective clinical trials. Thus, the main responsibilities of the clinical data manager are planning the clinical trial, assisting with the creation of the protocol and CRF, cleaning the data, confirming data consistency, and managing data quality in clinical trials. We believe that the clinical data manager will play an important role for data integration projects in the near future. Collaborations between the clinical data manager and the disease specialist will likely become even more important.</p></sec><sec id="s4-4"><title>Limitations</title><p>This project had several limitations. First, the data of the cohort studies did not cover all domains of this disease. In the future, we would like to increase the number of integration examples and generalize the program to cover all domains. Second, a great deal of time was spent manually setting the metadata. In the future, it may be beneficial to automatically refer to the shared metadata from the CDISC Library or to develop a tool that allows artificial intelligence to suggest the metadata using therapeutic area standards. Third, REDCap2SDTM version 2 required input for the ODM format. Several programs that generate ODM or Define-XML data from a spreadsheet are available from the CDISC Open Source Alliance [<xref ref-type="bibr" rid="ref27">27</xref>]. We will consider embedding these programs into REDCap2SDTM version 2 in the future.</p></sec><sec id="s4-5"><title>Conclusion</title><p>Our results suggest that the CDISC SDTM is useful for integrating multiple preexisting databases with variable names and codes. We hope that this research will contribute to the use of legacy data sets.</p></sec></sec></body><back><ack><p>This work was supported by Grant-in-Aid for Early-Career Scientists (Japan Society for the Promotion of Science [JSPS]) 18K17380; Grant-in-Aid for Scientific Research (C) (JSPS) 19K12867 and 21K10445; Japan Agency for Medical Research and Development (AMED) under grant JP20lk0201061; and Grant-in-Aid for Intractable Renal Diseases Research, Research on Rare and Intractable Diseases, Health and Labour Sciences Research Grants from the Ministry of Health, Labour and Welfare of Japan (grant 20FC1045).</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">CDISC</term><def><p>Clinical Data Interchange Standards Consortium</p></def></def-item><def-item><term id="abb2">CRF</term><def><p>case report form</p></def></def-item><def-item><term id="abb3">CT</term><def><p>controlled terminology</p></def></def-item><def-item><term id="abb4">FDA</term><def><p>Food and Drug Administration</p></def></def-item><def-item><term id="abb5">FHIR</term><def><p>Fast Healthcare Interoperability Resources</p></def></def-item><def-item><term id="abb6">ICMJE</term><def><p>International Committee of Medical Journal Editors</p></def></def-item><def-item><term id="abb7">IgA</term><def><p>immunoglobulin A</p></def></def-item><def-item><term id="abb8">LOINC</term><def><p>Logical Observation Identifiers Names and Codes</p></def></def-item><def-item><term id="abb9">ODM</term><def><p>Operational Data Model</p></def></def-item><def-item><term id="abb10">OMOP</term><def><p>Observational Medical Outcomes Partnership</p></def></def-item><def-item><term id="abb11">REDCap</term><def><p>Research Electronic Data Capture</p></def></def-item><def-item><term id="abb12">SDTM</term><def><p>Study Data Tabulation Model</p></def></def-item><def-item><term id="abb13">SNOMED</term><def><p>Systematized Nomenclature of Medicine</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Taichman</surname><given-names>DB</given-names></name><name name-style="western"><surname>Sahni</surname><given-names>P</given-names></name><name name-style="western"><surname>Pinborg</surname><given-names>A</given-names></name><etal/></person-group><article-title>Data sharing statements for clinical trials: a requirement of the International Committee of Medical Journal Editors</article-title><source>PLoS Med</source><year>2017</year><month>06</month><day>5</day><volume>14</volume><issue>6</issue><fpage>e1002315</fpage><pub-id pub-id-type="doi">10.1371/journal.pmed.1002315</pub-id><pub-id pub-id-type="medline">28582414</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Blettner</surname><given-names>M</given-names></name><name name-style="western"><surname>Sauerbrei</surname><given-names>W</given-names></name><name name-style="western"><surname>Schlehofer</surname><given-names>B</given-names></name><name name-style="western"><surname>Scheuchenpflug</surname><given-names>T</given-names></name><name name-style="western"><surname>Friedenreich</surname><given-names>C</given-names></name></person-group><article-title>Traditional reviews, meta-analyses and pooled analyses in epidemiology</article-title><source>Int J Epidemiol</source><year>1999</year><month>02</month><volume>28</volume><issue>1</issue><fpage>1</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1093/ije/28.1.1</pub-id><pub-id pub-id-type="medline">10195657</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nichols</surname><given-names>HB</given-names></name><name name-style="western"><surname>Schoemaker</surname><given-names>MJ</given-names></name><name name-style="western"><surname>Cai</surname><given-names>J</given-names></name><etal/></person-group><article-title>Breast cancer risk after recent childbirth: a pooled analysis of 15 prospective studies</article-title><source>Ann Intern Med</source><year>2019</year><month>01</month><day>1</day><volume>170</volume><issue>1</issue><fpage>22</fpage><lpage>30</lpage><pub-id pub-id-type="doi">10.7326/M18-1323</pub-id><pub-id pub-id-type="medline">30534999</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="web"><article-title>CDISC</article-title><source>Clinical Data Interchange Standards Consortium</source><access-date>2023-08-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdisc.org/">www.cdisc.org/</ext-link></comment></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="web"><article-title>SDTM</article-title><source>Clinical Data Interchange Standards Consortium</source><access-date>2023-08-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdisc.org/standards/foundational/sdtm">www.cdisc.org/standards/foundational/sdtm</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="book"><person-group person-group-type="author"><collab>CDISC Submission Data Standards Team</collab></person-group><source>CDISC Study Data Tabulation Model Implementation Guide: Human Clinical Trials Version 3.3</source><year>2018</year><publisher-name>Clinical Data Interchange Standards Consortium</publisher-name></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><article-title>Define-XML</article-title><source>Clinical Data Interchange Standards Consortium</source><access-date>2023-08-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdisc.org/standards/data-exchange/define-xml">www.cdisc.org/standards/data-exchange/define-xml</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Harris</surname><given-names>PA</given-names></name><name name-style="western"><surname>Taylor</surname><given-names>R</given-names></name><name name-style="western"><surname>Thielke</surname><given-names>R</given-names></name><name name-style="western"><surname>Payne</surname><given-names>J</given-names></name><name name-style="western"><surname>Gonzalez</surname><given-names>N</given-names></name><name name-style="western"><surname>Conde</surname><given-names>JG</given-names></name></person-group><article-title>Research Electronic Data Capture (REDCap)--a metadata-driven methodology and workflow process for providing translational research informatics support</article-title><source>J Biomed Inform</source><year>2009</year><month>04</month><volume>42</volume><issue>2</issue><fpage>377</fpage><lpage>381</lpage><pub-id pub-id-type="doi">10.1016/j.jbi.2008.08.010</pub-id><pub-id pub-id-type="medline">18929686</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Franklin</surname><given-names>JD</given-names></name><name name-style="western"><surname>Guidry</surname><given-names>A</given-names></name><name name-style="western"><surname>Brinkley</surname><given-names>JF</given-names></name></person-group><article-title>A partnership approach for electronic data capture in small-scale clinical trials</article-title><source>J Biomed Inform</source><year>2011</year><month>12</month><volume>44 Suppl 1</volume><issue>Suppl 1</issue><fpage>S103</fpage><lpage>S108</lpage><pub-id pub-id-type="doi">10.1016/j.jbi.2011.05.008</pub-id><pub-id pub-id-type="medline">21651992</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="web"><article-title>REDCap</article-title><source>Research Electronic Data Capture</source><access-date>2023-08-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.project-redcap.org/">www.project-redcap.org/</ext-link></comment></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yamamoto</surname><given-names>K</given-names></name><name name-style="western"><surname>Ota</surname><given-names>K</given-names></name><name name-style="western"><surname>Akiya</surname><given-names>I</given-names></name><name name-style="western"><surname>Shintani</surname><given-names>A</given-names></name></person-group><article-title>A pragmatic method for transforming clinical research data from the Research Electronic Data Capture &#x201C;REDCap&#x201D; to Clinical Data Interchange Standards Consortium (CDISC) Study Data Tabulation Model (SDTM): development and evaluation of REDCap2SDTM</article-title><source>J Biomed Inform</source><year>2017</year><month>06</month><volume>70</volume><fpage>65</fpage><lpage>76</lpage><pub-id pub-id-type="doi">10.1016/j.jbi.2017.05.003</pub-id><pub-id pub-id-type="medline">28487263</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yamamoto</surname><given-names>K</given-names></name></person-group><article-title>Introduction to Research Electronic Data Capture (REDCap) and REDCap2SDTM, a conversion tool to facilitate clinical research data sharing. Article in Japanese</article-title><source>Brain Nerve</source><year>2017</year><month>07</month><volume>69</volume><issue>7</issue><fpage>848</fpage><lpage>855</lpage><pub-id pub-id-type="doi">10.11477/mf.1416200830</pub-id><pub-id pub-id-type="medline">28740000</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oda</surname><given-names>T</given-names></name><name name-style="western"><surname>Chiu</surname><given-names>SW</given-names></name><name name-style="western"><surname>Yamaguchi</surname><given-names>T</given-names></name></person-group><article-title>Semi-automated conversion of clinical trial legacy data into CDISC SDTM standards format using supervised machine learning</article-title><source>Methods Inf Med</source><year>2021</year><month>05</month><volume>60</volume><issue>1-02</issue><fpage>49</fpage><lpage>61</lpage><pub-id pub-id-type="doi">10.1055/s-0041-1731388</pub-id><pub-id pub-id-type="medline">34237784</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>Cholesterol Treatment Trialists&#x2019; Collaboration</collab></person-group><article-title>Harmonisation of large-scale, heterogeneous individual participant adverse event data from randomised trials of statin therapy</article-title><source>Clin Trials</source><year>2022</year><month>12</month><volume>19</volume><issue>6</issue><fpage>593</fpage><lpage>604</lpage><pub-id pub-id-type="doi">10.1177/17407745221105509</pub-id><pub-id pub-id-type="medline">35815805</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Takahara</surname><given-names>S</given-names></name><name name-style="western"><surname>Saito</surname><given-names>TI</given-names></name><name name-style="western"><surname>Imai</surname><given-names>Y</given-names></name><name name-style="western"><surname>Kawakami</surname><given-names>T</given-names></name><name name-style="western"><surname>Murayama</surname><given-names>T</given-names></name></person-group><article-title>A use-case analysis of Clinical Data Interchange Standards Consortium/Study Data Tabulation Model in academia in an investigator-initiated clinical trial</article-title><source>Nagoya J Med Sci</source><year>2022</year><month>02</month><volume>84</volume><issue>1</issue><fpage>120</fpage><lpage>132</lpage><pub-id pub-id-type="doi">10.18999/nagjms.84.1.120</pub-id><pub-id pub-id-type="medline">35392016</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Koyama</surname><given-names>A</given-names></name><name name-style="western"><surname>Igarashi</surname><given-names>M</given-names></name><name name-style="western"><surname>Kobayashi</surname><given-names>M</given-names></name><collab>Research Group on Progressive Renal Diseases</collab></person-group><article-title>Natural history and risk factors for immunoglobulin A nephropathy in Japan</article-title><source>Am J Kidney Dis</source><year>1997</year><month>04</month><volume>29</volume><issue>4</issue><fpage>526</fpage><lpage>532</lpage><pub-id pub-id-type="doi">10.1016/S0272-6386(97)90333-4</pub-id><pub-id pub-id-type="medline">9100040</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="web"><article-title>Controlled terminology</article-title><source>Clinical Data Interchange Standards Consortium</source><access-date>2023-08-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdisc.org/standards/terminology/controlled-terminology">www.cdisc.org/standards/terminology/controlled-terminology</ext-link></comment></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="web"><article-title>ODM</article-title><source>Clinical Data Interchange Standards Consortium</source><access-date>2023-08-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdisc.org/standards/data-exchange/odm">www.cdisc.org/standards/data-exchange/odm</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tomino</surname><given-names>Y</given-names></name><name name-style="western"><surname>Sakai</surname><given-names>H</given-names></name><collab>Special Study Group (IgA Nephropathy) on Progressive Glomerular Disease</collab></person-group><article-title>Clinical guidelines for immunoglobulin A (IgA) nephropathy in Japan, second version</article-title><source>Clin Exp Nephrol</source><year>2003</year><month>06</month><volume>7</volume><issue>2</issue><fpage>93</fpage><lpage>97</lpage><pub-id pub-id-type="doi">10.1007/s10157-003-0232-4</pub-id><pub-id pub-id-type="medline">14586726</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>Working Group of the International IgA Nephropathy Network and the Renal Pathology Society</collab><name name-style="western"><surname>Roberts</surname><given-names>ISD</given-names></name><name name-style="western"><surname>Cook</surname><given-names>HT</given-names></name><etal/></person-group><article-title>The Oxford classification of IgA nephropathy: pathology definitions, correlations, and reproducibility</article-title><source>Kidney Int</source><year>2009</year><month>09</month><volume>76</volume><issue>5</issue><fpage>546</fpage><lpage>556</lpage><pub-id pub-id-type="doi">10.1038/ki.2009.168</pub-id><pub-id pub-id-type="medline">19571790</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="web"><article-title>Membership</article-title><source>Clinical Data Interchange Standards Consortium</source><access-date>2023-08-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdisc.org/membership">www.cdisc.org/membership</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="web"><article-title>Providing regulatory submissions in electronic format -- standardized study data</article-title><source>US Food &#x0026; Drug Administration</source><year>2021</year><month>06</month><access-date>2023-11-09</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.fda.gov/regulatory-information/search-fda-guidance-documents/providing-regulatory-submissions-electronic-format-standardized-study-data">www.fda.gov/regulatory-information/search-fda-guidance-documents/providing-regulatory-submissions-electronic-format-standardized-study-data</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="web"><article-title>eCRF portal</article-title><source>Clinical Data Interchange Standards Consortium</source><access-date>2023-08-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdisc.org/kb/ecrf">www.cdisc.org/kb/ecrf</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><article-title>Therapeutic areas</article-title><source>Clinical Data Interchange Standards Consortium</source><access-date>2023-08-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdisc.org/standards/therapeutic-areas">www.cdisc.org/standards/therapeutic-areas</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><article-title>OHDSI</article-title><source>Observational Health Data Sciences and Informatics</source><access-date>2023-08-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ohdsi.org/">www.ohdsi.org/</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>Enabling health interoperability through FHIR</article-title><source>HL7 FHIR Foundation</source><access-date>2023-08-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://fhir.org/">https://fhir.org/</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><article-title>COSA repository directory</article-title><source>CDISC Open Source Alliance</source><access-date>2023-08-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cosa.cdisc.org/">https://cosa.cdisc.org/</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>The package of REDCap2SDTM version 2.</p><media xlink:href="medinform_v11i1e46725_app1.zip" xlink:title="ZIP File, 42 KB"/></supplementary-material></app-group></back></article>