<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn></journal-meta><article-meta><article-id pub-id-type="publisher-id">49646</article-id><article-id pub-id-type="doi">10.2196/49646</article-id><title-group><article-title>A Scalable Pseudonymization Tool for Rapid Deployment in Large Biomedical Research Networks: Development and Evaluation Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Abu Attieh</surname><given-names>Hammam</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Neves</surname><given-names>Diogo Telmo</given-names></name><degrees>BSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Guedes</surname><given-names>Mariana</given-names></name><degrees>MSc, MD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Mirandola</surname><given-names>Massimo</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Dellacasa</surname><given-names>Chiara</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Rossi</surname><given-names>Elisa</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Prasser</surname><given-names>Fabian</given-names></name><degrees>Prof Dr</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Medical Informatics Group, Berlin Institute of Health at Charit&#x00E9; &#x2013; Universit&#x00E4;tsmedizin Berlin</institution>, <addr-line>Berlin</addr-line>, <country>Germany</country></aff><aff id="aff2"><institution>Infection and Antimicrobial Resistance Control and Prevention Unit, Centro Hospitalar Universit&#x00E1;rio S&#x00E3;o Jo&#x00E3;o</institution>, <addr-line>Porto</addr-line>, <country>Portugal</country></aff><aff id="aff3"><institution>Infectious Diseases and Microbiology Division, Hospital Universitario Virgen Macarena</institution>, <addr-line>Sevilla</addr-line>, <country>Spain</country></aff><aff id="aff4"><institution>Department of Medicine, University of Sevilla/Instituto de Biomedicina de Sevilla (IBiS)/Consejo Superior de Investigaciones Cient&#x00ED;ficas (CSIC)</institution>, <addr-line>Sevilla</addr-line>, <country>Spain</country></aff><aff id="aff5"><institution>Infectious Diseases Division, Diagnostic and Public Health Department, University of Verona</institution>, <addr-line>Verona</addr-line>, <country>Italy</country></aff><aff id="aff6"><institution>High Performance Computing (HPC) Department, CINECA - Consorzio Interuniversitario</institution>, <addr-line>Bologna</addr-line>, <country>Italy</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Lovis</surname><given-names>Christian</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Scheibner</surname><given-names>James</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Wu</surname><given-names>Xiang</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Hammam Abu Attieh, MSc<email>hammam.abu-attieh@bih-charite.de</email></corresp></author-notes><pub-date pub-type="collection"><year>2024</year></pub-date><pub-date pub-type="epub"><day>23</day><month>4</month><year>2024</year></pub-date><volume>12</volume><elocation-id>e49646</elocation-id><history><date date-type="received"><day>06</day><month>06</month><year>2023</year></date><date date-type="rev-recd"><day>03</day><month>10</month><year>2023</year></date><date date-type="accepted"><day>07</day><month>03</month><year>2024</year></date></history><copyright-statement>&#x00A9; Hammam Abu Attieh, Diogo Telmo Neves, Mariana Guedes, Massimo Mirandola, Chiara Dellacasa, Elisa Rossi, Fabian Prasser. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 23.4.2024. </copyright-statement><copyright-year>2024</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2024/1/e49646"/><abstract><sec><title>Background</title><p>The SARS-CoV-2 pandemic has demonstrated once again that rapid collaborative research is essential for the future of biomedicine. Large research networks are needed to collect, share, and reuse data and biosamples to generate collaborative evidence. However, setting up such networks is often complex and time-consuming, as common tools and policies are needed to ensure interoperability and the required flows of data and samples, especially for handling personal data and the associated data protection issues. In biomedical research, pseudonymization detaches directly identifying details from biomedical data and biosamples and connects them using secure identifiers, the so-called pseudonyms. This protects privacy by design but allows the necessary linkage and reidentification.</p></sec><sec><title>Objective</title><p>Although pseudonymization is used in almost every biomedical study, there are currently no pseudonymization tools that can be rapidly deployed across many institutions. Moreover, using centralized services is often not possible, for example, when data are reused and consent for this type of data processing is lacking. We present the ORCHESTRA Pseudonymization Tool (OPT), developed under the umbrella of the ORCHESTRA consortium, which faced exactly these challenges when it came to rapidly establishing a large-scale research network in the context of the rapid pandemic response in Europe.</p></sec><sec sec-type="methods"><title>Methods</title><p>To overcome challenges caused by the heterogeneity of IT infrastructures across institutions, the OPT was developed based on programmable runtime environments available at practically every institution: office suites. The software is highly configurable and provides many features, from subject and biosample registration to record linkage and the printing of machine-readable codes for labeling biosample tubes. Special care has been taken to ensure that the algorithms implemented are efficient so that the OPT can be used to pseudonymize large data sets, which we demonstrate through a comprehensive evaluation.</p></sec><sec sec-type="results"><title>Results</title><p>The OPT is available for Microsoft Office and LibreOffice, so it can be deployed on Windows, Linux, and MacOS. It provides multiuser support and is configurable to meet the needs of different types of research projects. Within the ORCHESTRA research network, the OPT has been successfully deployed at 13 institutions in 11 countries in Europe and beyond. As of June 2023, the software manages data about more than 30,000 subjects and 15,000 biosamples. Over 10,000 labels have been printed. The results of our experimental evaluation show that the OPT offers practical response times for all major functionalities, pseudonymizing 100,000 subjects in 10 seconds using Microsoft Excel and in 54 seconds using LibreOffice.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Innovative solutions are needed to make the process of establishing large research networks more efficient. The OPT, which leverages the runtime environment of common office suites, can be used to rapidly deploy pseudonymization and biosample management capabilities across research networks. The tool is highly configurable and available as open-source software.</p></sec></abstract><kwd-group><kwd>biomedical research</kwd><kwd>research network</kwd><kwd>data sharing</kwd><kwd>data protection</kwd><kwd>privacy</kwd><kwd>pseudonymization</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>As a response to the SARS-CoV-2 pandemic, many research projects have been rapidly set up to study the virus, its impact, and possible interventions [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. This accelerated the general trend toward large collaborative networks in biomedical research [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. These are motivated by the need to generate sufficiently large data sets and collections of biosamples, which are essential for developing new methods of personalized medicine and generating real-world evidence [<xref ref-type="bibr" rid="ref5">5</xref>]. However, setting up such networks usually takes quite some time, as common tools and policies are needed to achieve interoperability and enable the required flows of data and biosamples [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. One area in which this challenge is frequently encountered is the handling of personal data and the related data protection issues, which can arise in all processing steps, from collection [<xref ref-type="bibr" rid="ref8">8</xref>] to sharing [<xref ref-type="bibr" rid="ref9">9</xref>] and even analysis and visualization [<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>Laws and regulations, such as the European Union General Data Protection Regulation (GDPR) [<xref ref-type="bibr" rid="ref11">11</xref>] or the US Health Insurance Portability and Accountability Act (HIPAA) Privacy Rule [<xref ref-type="bibr" rid="ref12">12</xref>], advocate for various strategies for the protection of personal data. In general terms, the GDPR prohibits the processing of sensitive categories of personal data, including medical data, unless consent is given. However, under certain conditions, processing is also possible without consent if technical and organizational safeguards are implemented [<xref ref-type="bibr" rid="ref13">13</xref>]. Although there is no consensus on which protection methods are best suited for use in biomedical research [<xref ref-type="bibr" rid="ref14">14</xref>], pseudonymization (also called coding or pseudo-anonymization) [<xref ref-type="bibr" rid="ref15">15</xref>] is a common strategy, which can also be used to deidentify data under the HIPAA Privacy Rule. Pseudonymization is an essential aspect of the GDPR, as it is mentioned in multiple articles, in particular as a data minimization measure [<xref ref-type="bibr" rid="ref16">16</xref>]. In this privacy-by-design approach, directly identifying data about study subjects are stored separately from biomedical data and biosamples, which are needed for scientific analyses [<xref ref-type="bibr" rid="ref17">17</xref>]. The link between the different types of data and assets is established through secure identifiers, the so-called pseudonyms [<xref ref-type="bibr" rid="ref18">18</xref>], which enable data linkage and allow the reidentification of subjects only if strictly necessary, for example, for follow-up data collection.</p></sec><sec id="s1-2"><title>Objective</title><p>Although pseudonymization is done in almost any biomedical study, there are currently no pseudonymization tools that can rapidly be rolled out across many institutions. Existing tools, such as the Generic Pseudonym Administration Service (gPAS) [<xref ref-type="bibr" rid="ref19">19</xref>] and Mainzelliste [<xref ref-type="bibr" rid="ref20">20</xref>], are client-server applications, requiring server components to be deployed to and integrated into the institutions&#x2019; IT infrastructures. Although this can have some important advantages (see the <italic>Limitations and Future Work</italic> section), it is usually time-consuming, for example, due to a lack of resources or efforts required to ensure compliance with local security policies. Moreover, using central services, such as the European Unified Patient Identity Management (EUPID) [<xref ref-type="bibr" rid="ref21">21</xref>], is often not an option, for example, when data should be reused and consent is missing for this type of processing [<xref ref-type="bibr" rid="ref22">22</xref>].</p><p>In this paper, we present the ORCHESTRA Pseudonymization Tool (OPT) that has been developed under the umbrella of the ORCHESTRA consortium. This project faced the challenges described in the previous paragraph when quickly establishing a large-scale research network as part of Europe&#x2019;s rapid pandemic response [<xref ref-type="bibr" rid="ref23">23</xref>]. Hence, the OPT has been developed with the aim of supporting (1) the registration, pseudonymization, and management of study subject identities as well as biosamples; (2) rapid rollout across research network partners; and (3) scalability and simple configurability. The objective of this paper is to describe the design and implementation of the OPT and to offer insights into its usability and scalability, as evidenced by its deployment in the ORCHESTRA research network.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Ethical Considerations</title><p>The work described in this article covers the design and implementation of a generic research tool, which did not involve research on humans or human specimens and no epidemiological research with personal data. Therefore, no approval was required according to the statutes of the Ethics Committee of the Faculty of Medicine at Charit&#x00E9; - Universit&#x00E4;tsmedizin Berlin. However, the individual studies which use the tool usually have to apply for ethics approval. For example, the COVID HOME study within the ORCHESTRA project was approved by the Medical Ethical Review Committee of the University Medical Center Groningen (UMCG) under vote number METc 2020/158.</p></sec><sec id="s2-2"><title>General Approach</title><p>The OPT has been designed to support general pseudonymization workflows that are needed in most biomedical research projects, as illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p><p>When a subject is admitted to the hospital, visits a study center, or has a follow-up visit, they are enrolled in the study. In this setting, the physicians or study nurses collect directly identifying and medical data and, according to the study protocol, the appropriate biosamples. The identifying attributes are entered into the OPT to create a unique pseudonym: the OPT Subject ID. During the follow-up visits, the study staff can use the OPT to retrieve an existing pseudonym from a subject that was already enrolled in the study. In all downstream data collection or processing, the OPT Subject ID can be used instead of identifying data so that the medical data are protected but still linked to the study subject and across visits. In addition, biosample data can also be entered into the OPT and linked to the appropriate subject to generate 1 or more additional pseudonyms: the OPT Biosample IDs. A label can then be generated for each biosample vial, containing the OPT Biosample ID, the OPT Subject ID, a DataMatrix Code, a QR code, or a barcode (containing the OPT Biosample ID) for tracking the biosample via scanners commonly used in laboratories. Study-specific information, for example, the exact information to capture for each study subject and biosample, the number and schedule of visits, and the types and schedules of biosample collections, can all be configured in the OPT. Moreover, in addition to its applicability in prospective studies, as described above, the software also supports importing existing data about subjects and biosamples that can be used in retrospective study designs.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Basic concept of the OPT. IDAT: identifying data; MDAT: medical data; PID: patient ID; PSN: subject pseudonym; PSN-S: sample pseudonym; SDAT: sample data; SID: sample ID.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e49646_fig01.png"/></fig></sec><sec id="s2-3"><title>Implementation Details</title><p>To overcome challenges caused by the heterogeneity of IT infrastructures across different institutions and a potential lack of support by IT departments due to resource constraints, the OPT has been implemented based on programmable runtime environments that are available at practically any institution: office suites. These suites, especially the one by Microsoft, are among the most important and widely used applications around the world and still play a key role in many sectors today. The OPT is available for Microsoft Office as an Excel application and for LibreOffice as a Calc application. The application logic has been implemented in the embedded Basic scripting language using efficient algorithms for data management. Although Visual Basic for Applications is supported by Microsoft Office and LibreOffice Basic is supported by LibreOffice, they share similarities but are not fully compatible with each other. In the development process of the OPT, the Excel version serves as the primary implementation, and changes as well as additions are regularly ported to the LibreOffice version to achieve feature parity.</p><p>For generating the labels for the biosample vials, the OPT is delivered together with a single-page label printing application that takes pseudonyms and metadata (eg, visit labels) as input and generates printable labels. Although this application is implemented using web technologies such as HTML, CSS, and JavaScript, it is delivered as files and can be executed locally without access to the internet. The label printing application works in any common web browser and can be called via the OPT. Properties of the labels to be printed can either be automatically transmitted via the URL for a single label or manually copied into the application via an input field for bulk printing of a larger number of labels. It is also possible to host the application on a web server. However, in this case, the URL function will be deactivated in the OPT to ensure that no data are sent to the server that hosts the application. It is important to note that the application still runs completely locally in the browser of the user, and no data ever leave the devices used to print labels. The pseudonyms and biosample metadata will be temporarily managed in the browser of the device.</p></sec><sec id="s2-4"><title>Specific Functionalities</title><p>In addition to study subject and biosample management, the OPT also provides import and export functionalities, statistics, and a range of configuration options. In this section, we will briefly introduce each function, whereas a structured overview can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Regarding the subject-related functions, the OPT supports individual or bulk registration and a search function for finding pseudonyms for already registered subjects. An important feature of the software is a search function, required for any new patient or sample registration, which prevents multiple registrations of the same study participant. The search, to be performed as the first step of the registration, is linked to several data quality checks as well as a fuzzy record linkage process that prevents duplicate registrations. The bulk registration functionality enables the use of the OPT for retrospective pseudonymization of existing data sets. The search function supports wildcards and fuzzy matching across a configured set of master data attributes. Additional properties for the registered individuals can be documented to account for site-specific requirements.</p><p>Biosample-related functions are designed analogously to those for study subject management. In addition, labels can be generated and printed through the service described in the previous section.</p><p>Import and export functionalities are provided to enable the creation of backups (see the next section) and the migration from old versions of the OPT as part of update processes.</p><p>Finally, separate worksheets display statistical information about the data captured, such as the number of subjects registered or pseudonyms created for different study visits. Extensive configuration options are also available through a separate worksheet.</p><p>All functionalities of the OPT are described briefly in an integrated Quick User Guide and in detail in a comprehensive user manual [<xref ref-type="bibr" rid="ref24">24</xref>].</p></sec><sec id="s2-5"><title>Security Considerations and Features</title><p>The data collected during study subject and biosample registration, as well as the pseudonyms generated, are sensitive and a critical part of the data managed in any study. Hence, the confidentiality, integrity, and availability [<xref ref-type="bibr" rid="ref25">25</xref>] of the data managed in the OPT must be ensured. In this context, the approach taken by the OPT clearly trades off some of the guarantees that could be provided by a client-server application against the possibility of rapid deployment and rollout. However, as described in the user manual, care has been taken to provide robust guarantees by specifying requirements on how the OPT should be deployed and used [<xref ref-type="bibr" rid="ref24">24</xref>]. First, the OPT should not be placed on a local drive but on a network share that is integrated with the institution&#x2019;s Authentication and Authorization Infrastructure and, hence, provides means for controlling who is able to access the software in read or write mode and from which devices. Second, it is highly recommended that this share be backed up regularly so that data can be restored in case of problems. This should be complemented by regular, for example, daily, manual backups through the export functionality provided by the OPT and according to reminders that are displayed by the software. Finally, the office suites used as runtime environments do not provide multiuser support, and the application can only be opened by 1 user with write permission at any point in time. To enable parallel read access, the OPT comes with a script that opens a temporary read-only copy of the software. This allows, for example, laboratory technicians to use the OPT for generating biosample labels in parallel with ongoing registration processes. The measures described in this section have proven to be effective, and no problems have been encountered to date during extensive use of the software at many institutions (see the <italic>Results</italic> section).</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview of the Application</title><p>The graphical user interface of the OPT is divided into 10 different perspectives that provide access to the functionalities described in the previous sections. One of those sheets, the configuration sheet, is hidden from the users. All other sheets have write protection using the integrated protection functions of the spreadsheet software, except the input fields and the buttons, to ensure that data management is only performed through the specific functionalities provided by the software. A password is set by default for the write protection, which can be changed by the administrator at any time. However, it is important to keep the password safe. <xref ref-type="fig" rid="figure2">Figure 2</xref> provides an overview of 4 important perspectives.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Perspectives of the OPT for (A) configuration, (B) registration and search, (C) data overview, and (D) statistics. OPT: ORCHESTRA Pseudonymization Tool.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e49646_fig02.png"/></fig><p><xref ref-type="fig" rid="figure2">Figure 2A</xref> shows the configuration sheet, in which the specifics of the algorithm for generating pseudonyms, the study schedule, and the data fields to be documented can be specified. <xref ref-type="fig" rid="figure2">Figure 2B</xref> shows the interface provided for searching and registering subjects, with a search form on the left side of the sheet and a results list on the right side. All study subject data stored in the OPT are listed in the sheet shown in <xref ref-type="fig" rid="figure2">Figure 2C</xref>. This sheet also allows users to document any additional data that a site may require. Finally, <xref ref-type="fig" rid="figure2">Figure 2D</xref> shows a sheet providing statistical information on the number of subjects and biosamples registered, as well as insights into how these numbers have developed over time.</p><p>An overview of the label printing application is provided in <xref ref-type="fig" rid="figure3">Figure 3</xref>. As shown in the figure, the data that are to be printed on the labels are listed, and the number of rows and columns can be configured to support printing in bulk or for individual labels. The figure also shows an example of a sheet that can be printed and a detailed image of a single label. The data that are printed on those labels include the biosample and study subject IDs, the associated visit of the study schedule, and the biosample type.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Overview of the label printing application. OPT: ORCHESTRA Pseudonymization Tool.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e49646_fig03.png"/></fig></sec><sec id="s3-2"><title>Use of the OPT in the ORCHESTRA Project</title><p>ORCHESTRA is a 3-year international research project about the COVID-19 pandemic that was established in December 2020, involving 26 partners from 15 countries. The aim of ORCHESTRA is to share and analyze data from several retrospective and prospective studies to provide rigorous evidence for improving the prevention and treatment of COVID-19 and to better prepare for future pandemics [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>].</p><p>The data management architecture in ORCHESTRA consists of 3 layers that build upon each other. The first layer is formed by &#x201C;National Data Providers,&#x201D; which consist of the participating partners (universities, hospitals, and research networks). These provide the subject data and samples for joint analyses. On the second layer, &#x201C;National Hubs&#x201D; pool pseudonymized data in national instances of the Research Electronic Data Capture (REDCap) system [<xref ref-type="bibr" rid="ref28">28</xref>]. Finally, the &#x201C;ORCHESTRA Data Portal&#x201D; forms the third layer, in which access to aggregated data and results is provided through a central repository.</p><p>In ORCHESTRA, the OPT was used for implementing pseudonymization at the data providers&#x2019; sites. Each participating site named 1 or 2 persons responsible for technical aspects, such as setting up the required network share and installing updates, as well as several study nurses or clinicians, who would use the OPT. With these users, we performed regular training sessions and provided contact details in case of questions. As of June 2023, 19 instances of the OPT have been rolled out to 13 sites in 11 countries, including Germany, France, Italy, and Slovakia in Europe; Congo in Africa; and Argentina in South America. A world map highlighting all the countries in which the OPT has been rolled out can be found in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p><p>On average, each instance of the OPT was used by up to 4 staff members. The OPT has been successfully rolled out, used, and maintained at large sites with committed IT departments, as well as at smaller, resource-constrained institutions. Overall, it has been in constant production use for more than 2 years. In the majority of the sites (10/13, 77%), the OPT Microsoft Excel version was used, whereas the remaining sites (3/13, 23%) used the LibreOffice release. In total, more than 10,000 study subjects and 15,000 samples have been registered in the OPT across all sites, and more than 10,000 labels have been printed. To evaluate the usability of the OPT, we conducted a survey among all active users, leveraging the widespread System Usability Scale [<xref ref-type="bibr" rid="ref29">29</xref>] questionnaire, which includes 10 Likert-scale questions. During this survey, our system was designed to prevent multiple responses from individual participants and the submission of incomplete responses. We received 6 responses from 9 invited users, resulting in a score of 75 on a scale from 0 to 100, which adjectively translates to &#x201C;good&#x201D; [<xref ref-type="bibr" rid="ref30">30</xref>].</p></sec><sec id="s3-3"><title>Performance Evaluation</title><p>As mentioned, the OPT has been carefully designed to provide acceptable performance, even when large data sets are being processed or a large number of subjects or samples are being managed. In this section, we present the results of a brief performance evaluation. Our test environment consisted of an average office laptop, which was equipped with a quad-core 1.8 GHz Intel Core i7 CPU and a 64-bit Microsoft Windows 10 operating system. On top of it, Microsoft Excel 2016 (x32) and LibreOffice 7.0 (x64) were installed. <xref ref-type="fig" rid="figure4">Figure 4</xref> provides an overview of the execution times of the most important functionalities of the OPT for different cohort sizes.</p><p>The numbers clearly show that the OPT works well and provides excellent performance for small or medium-sized data sets and acceptable performance for large data sets.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Execution times of the most important operations of the ORCHESTRA Pseudonymization Tool: (A) import, (B) registration, and (C) search.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e49646_fig04.png"/></fig><p><xref ref-type="fig" rid="figure4">Figure 4A</xref> shows the average execution times for importing data about study subjects and samples. Data about subjects were imported into a completely empty OPT, whereas data about samples were imported into an OPT that already had the corresponding study subjects registered, so that each biosample was assigned to exactly 1 subject. For example, importing the data of 100,000 subjects took about 10 seconds in the Excel version and 54 seconds in the LibreOffice version. During the registration, the existence of the associated study subject in the OPT is checked, which makes the registration of samples slower compared to the registration of subjects. This is also noticeable in <xref ref-type="fig" rid="figure4">Figure 4B</xref>, which shows the average execution times for registering a single study subject or sample. As can be seen, using an OPT data set in which 100,000 entities were already registered, this took between 2 and 4 seconds in the Excel version and between 4 and 6 seconds in the LibreOffice version. <xref ref-type="fig" rid="figure4">Figure 4C</xref> shows the average execution times for searching for entities and obtaining their pseudonym, which is roughly twice as fast as the registration operation.</p><p>As performance is associated linearly with the number of entities already managed, subsecond response times can be expected for instances in which around 15,000 or fewer subjects or samples have been registered. This is consistent with our experiences from the deployments in the ORCHESTRA research network.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>In this paper, we presented the OPT, a comprehensive, scalable, and pragmatic pseudonymization tool that can be rapidly rolled out across large research networks. To achieve this, the software has been implemented based on runtime environments that are available at practically any institution: office suites. The software supports a broad range of functionalities, from registering and pseudonymizing subject and biosample identities to search and depseudonymization functions, statistics about the data managed, as well as import and export features. We have described measures that are recommended to ensure the security of the data managed by the OPT and reported on our experiences gained after 2 years of successful operation in a large research network on COVID-19. Finally, we have also presented the results of a performance evaluation showing that the software provides excellent performance for small or medium-sized data sets and acceptable performance for large data sets. The OPT is available as open-source software [<xref ref-type="bibr" rid="ref31">31</xref>] and can be configured to meet the needs of a wide range of biomedical research projects.</p></sec><sec id="s4-2"><title>Limitations and Future Work</title><p>To achieve the design goals of the OPT, some compromises had to be made regarding data management. Compared to using client-server applications that use database management systems to store data, it is more difficult to ensure the confidentiality, integrity, and availability of the data managed with the OPT. There is also limited support for multiuser scenarios. However, we have developed and documented a set of measures that, if taken, help to still ensure a high level of data security. For this to work, it is important that users adhere to those recommendations. Therefore, all users of the OPT should familiarize themselves with the manual [<xref ref-type="bibr" rid="ref24">24</xref>], and ideally, they should also be trained in the use and operation of the software. Despite these limitations, we strongly believe that our approach offers an innovative take on pseudonymization tools that can rapidly be rolled out across large research networks. Of course, it would be even more desirable if global standards for pseudonymization functions could be developed and agreed upon. Such global standards would ensure that solutions already existing at many research institutions are interoperable and can readily be used in joint research activities.</p></sec><sec id="s4-3"><title>Comparison With Related Work</title><p>A range of pseudonymization tools has been described in the literature and are available as open-source software. However, they are either based on a client-server architecture and hence require quite some effort to be rolled out across sites, based on central services and hence not usable if consent is lacking for this type of processing, or offered as command-line utilities or programming libraries for IT experts.</p><p>Examples of client-server approaches include the work by Lablans et al [<xref ref-type="bibr" rid="ref20">20</xref>] to provide a RESTful interface to pseudonymization services in modern web applications, which is based on a concept suggested by Pommerening et al [<xref ref-type="bibr" rid="ref6">6</xref>] in 2006. Moreover, researchers from the University of Greifswald in Germany have designed and developed several client-server tools that can be used to manage subjects, samples, and other aspects of biomedical studies [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>].</p><p>Examples of central services for pseudonymization include the EUPID, which was developed in 2014 by the Austrian Institute of Technology for the European Network for Cancer Research in Children and Adolescents project [<xref ref-type="bibr" rid="ref21">21</xref>]. Another example is the Secure Privacy-preserving Identity management in Distributed Environments for Research (SPIDER) service, which was launched in May 2022 by the Joint Research Centre [<xref ref-type="bibr" rid="ref34">34</xref>]. Both services support linking and transferring subject data across registries without revealing their identities. However, biosample data management is not possible with them. Further centralized concepts include the one described by Angelow et al [<xref ref-type="bibr" rid="ref35">35</xref>].</p><p>Examples of command-line utilities, application programming interfaces, and programming libraries include the generic solution for record linkage of special categories of personal data developed by Fischer et al [<xref ref-type="bibr" rid="ref36">36</xref>]; that by Preciado-Marquez et al [<xref ref-type="bibr" rid="ref37">37</xref>]; and the PID (patient ID) generator developed by the TMF (Technologies, Methods and Infrastructure for Networked Medical Research e.V.), the German umbrella association for networked medical research [<xref ref-type="bibr" rid="ref6">6</xref>].</p></sec><sec id="s4-4"><title>Conclusion</title><p>Widely available office suites provide runtime environments that offer opportunities to rapidly roll out software components for biomedical studies across a wide range of large and resource-constrained research institutions. We have demonstrated this through the development, practical use, and evaluation of the OPT, which offers pseudonymization functionalities for study subjects and biosamples. As we believe that the software is of interest to the larger research community, it has been made available under a permissive open-source license [<xref ref-type="bibr" rid="ref31">31</xref>].</p></sec></sec></body><back><ack><p>This work has been funded by the European Union&#x2019;s Horizon 2020 research and innovation programme under the project ORCHESTRA (grant agreement 101016167).</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">EUPID</term><def><p>European Unified Patient Identity Management</p></def></def-item><def-item><term id="abb2">GDPR</term><def><p>General Data Protection Regulation</p></def></def-item><def-item><term id="abb3">gPAS</term><def><p>Generic Pseudonym Administration Service</p></def></def-item><def-item><term id="abb4">HIPAA</term><def><p>Health Insurance Portability and Accountability Act</p></def></def-item><def-item><term id="abb5">OPT</term><def><p>ORCHESTRA Pseudonymization Tool</p></def></def-item><def-item><term id="abb6">PID</term><def><p>patient ID</p></def></def-item><def-item><term id="abb7">REDCap</term><def><p>Research Electronic Data Capture</p></def></def-item><def-item><term id="abb8">SPIDER</term><def><p>Secure Privacy-preserving Identity management in Distributed Environments for Research</p></def></def-item><def-item><term id="abb9">SUS</term><def><p>System Usability Scale</p></def></def-item><def-item><term id="abb10">TMF</term><def><p>Technologies, Methods and Infrastructure for Networked Medical Research e.V.</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dron</surname><given-names>L</given-names></name><name name-style="western"><surname>Dillman</surname><given-names>A</given-names></name><name name-style="western"><surname>Zoratti</surname><given-names>MJ</given-names></name><name name-style="western"><surname>Haggstrom</surname><given-names>J</given-names></name><name name-style="western"><surname>Mills</surname><given-names>EJ</given-names></name><name name-style="western"><surname>Park</surname><given-names>JJH</given-names></name></person-group><article-title>Clinical trial data sharing for COVID-19-related research</article-title><source>J Med Internet Res</source><year>2021</year><month>03</month><day>12</day><volume>23</volume><issue>3</issue><fpage>e26718</fpage><pub-id pub-id-type="doi">10.2196/26718</pub-id><pub-id pub-id-type="medline">33684053</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="other"><person-group person-group-type="author"><collab>R&#x0026;D Blueprint</collab></person-group><article-title>A coordinated global research roadmap: 2019 novel coronavirus</article-title><year>2020</year><month>03</month><day>12</day><access-date>2024-04-12</access-date><publisher-name>World Health Organization</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/publications/m/item/a-coordinated-global-research-roadmap">https://www.who.int/publications/m/item/a-coordinated-global-research-roadmap</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guinney</surname><given-names>J</given-names></name><name name-style="western"><surname>Saez-Rodriguez</surname><given-names>J</given-names></name></person-group><article-title>Alternative models for sharing confidential biomedical data</article-title><source>Nat Biotechnol</source><year>2018</year><month>05</month><day>9</day><volume>36</volume><issue>5</issue><fpage>391</fpage><lpage>392</lpage><pub-id pub-id-type="doi">10.1038/nbt.4128</pub-id><pub-id pub-id-type="medline">29734317</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Walport</surname><given-names>M</given-names></name><name name-style="western"><surname>Brest</surname><given-names>P</given-names></name></person-group><article-title>Sharing research data to improve public health</article-title><source>Lancet</source><year>2011</year><month>02</month><day>12</day><volume>377</volume><issue>9765</issue><fpage>537</fpage><lpage>539</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(10)62234-9</pub-id><pub-id pub-id-type="medline">21216456</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mahmoud</surname><given-names>A</given-names></name><name name-style="western"><surname>Ahlborn</surname><given-names>B</given-names></name><name name-style="western"><surname>Mansmann</surname><given-names>U</given-names></name><name name-style="western"><surname>Reinhardt</surname><given-names>I</given-names></name></person-group><article-title>Clientside pseudonymization with trusted third-party using modern web technology</article-title><source>Stud Health Technol Inform</source><year>2021</year><month>05</month><day>27</day><volume>281</volume><fpage>496</fpage><lpage>497</lpage><pub-id pub-id-type="doi">10.3233/SHTI210212</pub-id><pub-id pub-id-type="medline">34042618</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Pommerening</surname><given-names>K</given-names></name><name name-style="western"><surname>Schr&#x00F6;der</surname><given-names>M</given-names></name><name name-style="western"><surname>Petrov</surname><given-names>D</given-names></name><name name-style="western"><surname>Schl&#x00F6;sser-Fa&#x00DF;bender</surname><given-names>M</given-names></name><name name-style="western"><surname>Semler</surname><given-names>SC</given-names></name><name name-style="western"><surname>Drepper</surname><given-names>J</given-names></name></person-group><article-title>Pseudonymization service and data custodians in medical research networks and biobanks</article-title><source>INFORMATIK 2006 &#x2013; INFORMATIK f&#x00FC;r Menschen</source><year>2006</year><volume>1</volume><publisher-name>Gesellschaft f&#x00FC;r Informatik e.V</publisher-name><fpage>715</fpage><lpage>721</lpage><pub-id pub-id-type="other">978-3-88579-187-4</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tacconelli</surname><given-names>E</given-names></name><name name-style="western"><surname>Gorska</surname><given-names>A</given-names></name><name name-style="western"><surname>Carrara</surname><given-names>E</given-names></name><etal/></person-group><article-title>Challenges of data sharing in European COVID-19 projects: a learning opportunity for advancing pandemic preparedness and response</article-title><source>Lancet Reg Health Eur</source><year>2022</year><month>10</month><volume>21</volume><fpage>100467</fpage><pub-id pub-id-type="doi">10.1016/j.lanepe.2022.100467</pub-id><pub-id pub-id-type="medline">35942201</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rumbold</surname><given-names>J</given-names></name><name name-style="western"><surname>Pierscionek</surname><given-names>B</given-names></name></person-group><article-title>Contextual anonymization for secondary use of big data in biomedical research: proposal for an anonymization matrix</article-title><source>JMIR Med Inform</source><year>2018</year><month>11</month><day>22</day><volume>6</volume><issue>4</issue><fpage>e47</fpage><pub-id pub-id-type="doi">10.2196/medinform.7096</pub-id><pub-id pub-id-type="medline">30467101</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aamot</surname><given-names>H</given-names></name><name name-style="western"><surname>Kohl</surname><given-names>CD</given-names></name><name name-style="western"><surname>Richter</surname><given-names>D</given-names></name><name name-style="western"><surname>Knaup-Gregori</surname><given-names>P</given-names></name></person-group><article-title>Pseudonymization of patient identifiers for translational research</article-title><source>BMC Med Inform Decis Mak</source><year>2013</year><month>07</month><day>24</day><volume>13</volume><fpage>75</fpage><pub-id pub-id-type="doi">10.1186/1472-6947-13-75</pub-id><pub-id pub-id-type="medline">23883409</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>X</given-names></name><name name-style="western"><surname>Wang</surname><given-names>H</given-names></name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names></name><name name-style="western"><surname>Li</surname><given-names>R</given-names></name></person-group><article-title>A secure visual framework for multi-index protection evaluation in networks</article-title><source>Digit Commun Netw</source><year>2023</year><month>04</month><volume>9</volume><issue>2</issue><fpage>327</fpage><lpage>336</lpage><pub-id pub-id-type="doi">10.1016/j.dcan.2022.05.007</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="web"><article-title>Regulation (EU) 2016/679 of the European Parliament and of the Council</article-title><source>Official Journal of the European Union</source><year>2016</year><month>04</month><day>27</day><access-date>2024-04-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32016R0679">https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32016R0679</ext-link></comment></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>U.S. Department of Health and Human Services, Office for Civil Rights</collab></person-group><article-title>HIPAA administrative simplification: regulation text: 45 CFR parts 160, 162, and 164 (unofficial version, as amended through March 26, 2013)</article-title><source>U.S. Department of Health and Human Services</source><year>2013</year><month>03</month><day>26</day><access-date>2024-04-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.hhs.gov/sites/default/files/hipaa-simplification-201303.pdf">https://www.hhs.gov/sites/default/files/hipaa-simplification-201303.pdf</ext-link></comment></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Quinn</surname><given-names>P</given-names></name></person-group><article-title>Research under the GDPR - a level playing field for public and private sector research?</article-title><source>Life Sci Soc Policy</source><year>2021</year><month>03</month><day>1</day><volume>17</volume><issue>1</issue><fpage>4</fpage><pub-id pub-id-type="doi">10.1186/s40504-021-00111-z</pub-id><pub-id pub-id-type="medline">33648586</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rodriguez</surname><given-names>A</given-names></name><name name-style="western"><surname>Tuck</surname><given-names>C</given-names></name><name name-style="western"><surname>Dozier</surname><given-names>MF</given-names></name><etal/></person-group><article-title>Current recommendations/practices for anonymising data from clinical trials in order to make it available for sharing: a scoping review</article-title><source>Clin Trials</source><year>2022</year><month>08</month><volume>19</volume><issue>4</issue><fpage>452</fpage><lpage>463</lpage><pub-id pub-id-type="doi">10.1177/17407745221087469</pub-id><pub-id pub-id-type="medline">35730910</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kohlmayer</surname><given-names>F</given-names></name><name name-style="western"><surname>Lautenschl&#x00E4;ger</surname><given-names>R</given-names></name><name name-style="western"><surname>Prasser</surname><given-names>F</given-names></name></person-group><article-title>Pseudonymization for research data collection: is the juice worth the squeeze?</article-title><source>BMC Med Inform Decis Mak</source><year>2019</year><month>09</month><day>4</day><volume>19</volume><issue>1</issue><fpage>178</fpage><pub-id pub-id-type="doi">10.1186/s12911-019-0905-x</pub-id><pub-id pub-id-type="medline">31484555</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Gruschka</surname><given-names>N</given-names></name><name name-style="western"><surname>Mavroeidis</surname><given-names>V</given-names></name><name name-style="western"><surname>Vishi</surname><given-names>K</given-names></name><name name-style="western"><surname>Jensen</surname><given-names>M</given-names></name></person-group><article-title>Privacy issues and data protection in big data: a case study analysis under GDPR</article-title><conf-name>2018 IEEE International Conference on Big Data (Big Data)</conf-name><conf-date>Dec 10 to 13, 2018</conf-date><conf-loc>Seattle, WA</conf-loc><fpage>5027</fpage><lpage>5033</lpage><pub-id pub-id-type="doi">10.1109/BigData.2018.8622621</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lautenschl&#x00E4;ger</surname><given-names>R</given-names></name><name name-style="western"><surname>Kohlmayer</surname><given-names>F</given-names></name><name name-style="western"><surname>Prasser</surname><given-names>F</given-names></name><name name-style="western"><surname>Kuhn</surname><given-names>KA</given-names></name></person-group><article-title>A generic solution for web-based management of pseudonymized data</article-title><source>BMC Med Inform Decis Mak</source><year>2015</year><month>11</month><day>30</day><volume>15</volume><fpage>100</fpage><pub-id pub-id-type="doi">10.1186/s12911-015-0222-y</pub-id><pub-id pub-id-type="medline">26621059</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="other"><person-group person-group-type="author"><collab>European Union Agency for Cybersecurity</collab><name name-style="western"><surname>Drogkaris</surname><given-names>P</given-names></name><name name-style="western"><surname>Bourka</surname><given-names>A</given-names></name></person-group><article-title>Recommendations on shaping technology according to GDPR provisions - an overview on data pseudonymisation</article-title><year>2018</year><publisher-name>European Network and Information Security Agency</publisher-name><pub-id pub-id-type="doi">10.2824/74954</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bialke</surname><given-names>M</given-names></name><name name-style="western"><surname>Bahls</surname><given-names>T</given-names></name><name name-style="western"><surname>Havemann</surname><given-names>C</given-names></name><etal/></person-group><article-title>MOSAIC--a modular approach to data management in epidemiological studies</article-title><source>Methods Inf Med</source><year>2015</year><volume>54</volume><issue>4</issue><fpage>364</fpage><lpage>371</lpage><pub-id pub-id-type="doi">10.3414/ME14-01-0133</pub-id><pub-id pub-id-type="medline">26196494</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lablans</surname><given-names>M</given-names></name><name name-style="western"><surname>Borg</surname><given-names>A</given-names></name><name name-style="western"><surname>&#x00DC;ckert</surname><given-names>F</given-names></name></person-group><article-title>A RESTful interface to pseudonymization services in modern web applications</article-title><source>BMC Med Inform Decis Mak</source><year>2015</year><month>02</month><day>7</day><volume>15</volume><fpage>2</fpage><pub-id pub-id-type="doi">10.1186/s12911-014-0123-5</pub-id><pub-id pub-id-type="medline">25656224</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nitzlnader</surname><given-names>M</given-names></name><name name-style="western"><surname>Schreier</surname><given-names>G</given-names></name></person-group><article-title>Patient identity management for secondary use of biomedical research data in a distributed computing environment</article-title><source>Stud Health Technol Inform</source><year>2014</year><volume>198</volume><fpage>211</fpage><lpage>218</lpage><pub-id pub-id-type="medline">24825705</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>El Emam</surname><given-names>K</given-names></name><name name-style="western"><surname>Rodgers</surname><given-names>S</given-names></name><name name-style="western"><surname>Malin</surname><given-names>B</given-names></name></person-group><article-title>Anonymising and sharing individual patient data</article-title><source>BMJ</source><year>2015</year><month>03</month><day>20</day><volume>350</volume><fpage>h1139</fpage><pub-id pub-id-type="doi">10.1136/bmj.h1139</pub-id><pub-id pub-id-type="medline">25794882</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="web"><article-title>Connecting European cohorts to increase common and effective response to SARS-CoV-2 pandemic: ORCHESTRA</article-title><source>European Commission</source><year>2022</year><month>04</month><day>21</day><access-date>2023-06-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cordis.europa.eu/project/id/101016167/de">https://cordis.europa.eu/project/id/101016167/de</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><article-title>BIH-MI/opt: ORCHESTRA pseudonymization tool - user manual</article-title><source>GitHub</source><year>2023</year><month>09</month><day>24</day><access-date>2023-09-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/BIH-MI/opt/blob/main/development/documentation/user-manual.pdf">https://github.com/BIH-MI/opt/blob/main/development/documentation/user-manual.pdf</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="other"><article-title>ISO/IEC 27001:2022 information security, cybersecurity and privacy protection - information security management systems - requirements</article-title><year>2022</year><access-date>2024-04-12</access-date><publisher-name>International Organization for Standardization</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.iso.org/standard/27001">https://www.iso.org/standard/27001</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Azzini</surname><given-names>AM</given-names></name><name name-style="western"><surname>Canziani</surname><given-names>LM</given-names></name><name name-style="western"><surname>Davis</surname><given-names>RJ</given-names></name><etal/></person-group><article-title>How European research projects can support vaccination strategies: the case of the ORCHESTRA project for SARS-CoV-2</article-title><source>Vaccines (Basel)</source><year>2023</year><month>08</month><day>14</day><volume>11</volume><issue>8</issue><fpage>1361</fpage><pub-id pub-id-type="doi">10.3390/vaccines11081361</pub-id><pub-id pub-id-type="medline">37631929</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><article-title>ORCHESTRA - EU horizon 2020 cohort to tackle COVID-19 internationally</article-title><source>ORCHESTRA</source><year>2022</year><month>09</month><day>19</day><access-date>2023-04-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://orchestra-cohort.eu/">https://orchestra-cohort.eu/</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Harris</surname><given-names>PA</given-names></name><name name-style="western"><surname>Taylor</surname><given-names>R</given-names></name><name name-style="western"><surname>Thielke</surname><given-names>R</given-names></name><name name-style="western"><surname>Payne</surname><given-names>J</given-names></name><name name-style="western"><surname>Gonzalez</surname><given-names>N</given-names></name><name name-style="western"><surname>Conde</surname><given-names>JG</given-names></name></person-group><article-title>Research electronic data capture (REDCAP)--a metadata-driven methodology and workflow process for providing translational research Informatics support</article-title><source>J Biomed Inform</source><year>2009</year><month>04</month><volume>42</volume><issue>2</issue><fpage>377</fpage><lpage>381</lpage><pub-id pub-id-type="doi">10.1016/j.jbi.2008.08.010</pub-id><pub-id pub-id-type="medline">18929686</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Brooke</surname><given-names>J</given-names></name></person-group><article-title>SUS: a quick and dirty usability scale</article-title><source>Usability Evaluation in Industry</source><year>1996</year><publisher-name>CRC Press</publisher-name><fpage>189</fpage><lpage>194</lpage></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bangor</surname><given-names>A</given-names></name><name name-style="western"><surname>Kortum</surname><given-names>P</given-names></name><name name-style="western"><surname>Miller</surname><given-names>J</given-names></name></person-group><article-title>Determining what individual SUS scores mean: adding an adjective rating scale</article-title><source>J Usability Stud</source><year>2009</year><month>05</month><access-date>2024-04-12</access-date><volume>4</volume><issue>3</issue><fpage>114</fpage><lpage>123</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://uxpajournal.org/wp-content/uploads/sites/7/pdf/JUS_Bangor_May2009.pdf">https://uxpajournal.org/wp-content/uploads/sites/7/pdf/JUS_Bangor_May2009.pdf</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><article-title>BIH-MI/opt: ORCHESTRA pseudonymization tool</article-title><source>GitHub</source><year>2023</year><month>06</month><day>2</day><access-date>2023-06-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/BIH-MI/opt">https://github.com/BIH-MI/opt</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="thesis"><person-group person-group-type="author"><name name-style="western"><surname>Bialke</surname><given-names>M</given-names></name></person-group><source>Werkzeuggest&#x00FC;tzte Verfahren f&#x00FC;r die Realisierung einer Treuhandstelle im Rahmen des zentralen Datenmanagements in der epidemiologischen Forschung [Dissertation]</source><year>2016</year><access-date>2024-04-12</access-date><publisher-name>Universit&#x00E4;tsmedizin der Ernst-Moritz-Arndt-Universit&#x00E4;t Greifswald</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://d-nb.info/1124566945/34">https://d-nb.info/1124566945/34</ext-link></comment></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bialke</surname><given-names>M</given-names></name><name name-style="western"><surname>Penndorf</surname><given-names>P</given-names></name><name name-style="western"><surname>Wegner</surname><given-names>T</given-names></name><etal/></person-group><article-title>A workflow-driven approach to integrate generic software modules in a trusted third party</article-title><source>J Transl Med</source><year>2015</year><month>06</month><day>4</day><volume>13</volume><fpage>176</fpage><pub-id pub-id-type="doi">10.1186/s12967-015-0545-6</pub-id><pub-id pub-id-type="medline">26040848</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="web"><article-title>SPIDER pseudonymisation tool</article-title><source>European Commission</source><year>2023</year><month>05</month><day>4</day><access-date>2023-06-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://eu-rd-platform.jrc.ec.europa.eu/spider/">https://eu-rd-platform.jrc.ec.europa.eu/spider/</ext-link></comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Angelow</surname><given-names>A</given-names></name><name name-style="western"><surname>Schmidt</surname><given-names>M</given-names></name><name name-style="western"><surname>Weitmann</surname><given-names>K</given-names></name><etal/></person-group><article-title>Methods and implementation of a central biosample and data management in a three-centre clinical study</article-title><source>Comput Methods Programs Biomed</source><year>2008</year><month>07</month><volume>91</volume><issue>1</issue><fpage>82</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.1016/j.cmpb.2008.02.002</pub-id><pub-id pub-id-type="medline">18406002</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Fischer</surname><given-names>H</given-names></name><name name-style="western"><surname>R&#x00F6;hrig</surname><given-names>R</given-names></name><name name-style="western"><surname>Thiemann</surname><given-names>VS</given-names></name></person-group><article-title>Simple Batch Record Linkage System (SimBa) &#x2013; a generic tool for record linkage of special categories of personal data in small networked research projects with distributed data sources: lessons learned from the Inno_RD project</article-title><source>Deutsche Gesellschaft f&#x00FC;r Medizinische Informatik, Biometrie und Epidemiologie. 64. Jahrestagung der Deutschen Gesellschaft f&#x00FC;r Medizinische Informatik, Biometrie und Epidemiologie e. V. (GMDS)</source><year>2019</year><publisher-name>German Medical Science GMS Publishing House</publisher-name><pub-id pub-id-type="doi">10.3205/19gmds118</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Preciado-Marquez</surname><given-names>D</given-names></name><name name-style="western"><surname>Becker</surname><given-names>L</given-names></name><name name-style="western"><surname>Storck</surname><given-names>M</given-names></name><name name-style="western"><surname>Greulich</surname><given-names>L</given-names></name><name name-style="western"><surname>Dugas</surname><given-names>M</given-names></name><name name-style="western"><surname>Brix</surname><given-names>TJ</given-names></name></person-group><article-title>MainzelHandler: a library for a simple integration and usage of the Mainzelliste</article-title><source>Stud Health Technol Inform</source><year>2021</year><month>05</month><day>27</day><volume>281</volume><fpage>233</fpage><lpage>237</lpage><pub-id pub-id-type="doi">10.3233/SHTI210155</pub-id><pub-id pub-id-type="medline">34042740</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Overview of the ORCHESTRA Pseudonymization Tool functions.</p><media xlink:href="medinform_v12i1e49646_app1.png" xlink:title="PNG File, 233 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Map of countries in which the ORCHESTRA Pseudonymization Tool has been rolled out.</p><media xlink:href="medinform_v12i1e49646_app2.png" xlink:title="PNG File, 229 KB"/></supplementary-material></app-group></back></article>