<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e73605</article-id><article-id pub-id-type="doi">10.2196/73605</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Large Language Model Versus Manual Review for Clinical Data Curation in Breast Cancer: Retrospective Comparative Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Kang</surname><given-names>Young-Joon</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Hocheol</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yi</surname><given-names>Jae Pak</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kim</surname><given-names>Hyobin</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yoon</surname><given-names>Chang Ik</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Baek</surname><given-names>Jong Min</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kim</surname><given-names>Yong-seok</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Jeon</surname><given-names>Ye Won</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Rhu</surname><given-names>Jiyoung</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lim</surname><given-names>Su Hyun</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Choi</surname><given-names>Hoon</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Oh</surname><given-names>Se Jeong</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Surgery, College of Medicine, The Catholic University of Korea, Incheon St Mary's Hospital</institution><addr-line>56, Dongsu-ro, Bupyeong-gu</addr-line><addr-line>Incheon</addr-line><country>Republic of Korea</country></aff><aff id="aff2"><institution>Department of AI Health Information Management, Yonsei University (Mirae)</institution><addr-line>Wonju</addr-line><country>Republic of Korea</country></aff><aff id="aff3"><institution>Department of Surgery, College of Medicine, The Catholic University of Korea, Seoul St Mary's Hospital</institution><addr-line>Seoul</addr-line><country>Republic of Korea</country></aff><aff id="aff4"><institution>Department of Surgery, College of Medicine, The Catholic University of Korea, Yeouido St Mary's Hospital</institution><addr-line>Seoul</addr-line><country>Republic of Korea</country></aff><aff id="aff5"><institution>Department of Surgery, College of Medicine, The Catholic University of Korea, Uijeongbu St Mary's Hospital</institution><addr-line>Uijeongbu</addr-line><country>Republic of Korea</country></aff><aff id="aff6"><institution>Department of Surgery, College of Medicine, The Catholic University of Korea, St Vincent's Hospital</institution><addr-line>Suwon</addr-line><country>Republic of Korea</country></aff><aff id="aff7"><institution>Department of Surgery, College of Medicine, The Catholic University of Korea, Bucheon St Mary's Hospital</institution><addr-line>Bucheon</addr-line><country>Republic of Korea</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Benis</surname><given-names>Arriel</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Khanteymoori</surname><given-names>Alireza</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Garc&#x00ED;a-Barrag&#x00E1;n</surname><given-names>&#x00C1;lvaro</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Young-Joon Kang, MD, PhD, Department of Surgery, College of Medicine, The Catholic University of Korea, Incheon St Mary's Hospital, 56, Dongsu-ro, Bupyeong-gu, Incheon, 21431, Republic of Korea, 01026383847; <email>yjkang.md@gmail.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>6</day><month>11</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e73605</elocation-id><history><date date-type="received"><day>07</day><month>03</month><year>2025</year></date><date date-type="rev-recd"><day>06</day><month>10</month><year>2025</year></date><date date-type="accepted"><day>07</day><month>10</month><year>2025</year></date></history><copyright-statement>&#x00A9; Young-Joon Kang, Hocheol Lee, Jae Pak Yi, Hyobin Kim, Chang Ik Yoon, Jong Min Baek, Yong-seok Kim, Ye Won Jeon, Jiyoung Rhu, Su Hyun Lim, Hoon Choi, Se Jeong Oh. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 6.11.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e73605"/><abstract><sec><title>Background</title><p>Manual review of electronic health records for clinical research is labor-intensive and prone to reviewer-dependent variations. Large language models (LLMs) offer potential for automated clinical data extraction; however, their feasibility in surgical oncology remains underexplored.</p></sec><sec><title>Objective</title><p>This study aimed to evaluate the feasibility and accuracy of LLM-based processing compared with manual physician review for extracting clinical data from breast cancer records.</p></sec><sec sec-type="methods"><title>Methods</title><p>We conducted a retrospective comparative study analyzing breast cancer records from 5 academic hospitals (January 2019-December 2019). Two data extraction pathways were compared: (1) manual physician review with direct electronic health record access (group 1: 1366/3100, 44.06%) and (2) LLM-based processing using Claude 3.5 Sonnet (Anthropic) on deidentified data automatically extracted through a clinical data warehouse platform (group 2: 1734/3100, 55.94%). The automated extraction system provided prestructured, deidentified data sheets organized by clinical domains, which were then processed by the LLM. The LLM prompt was developed through a 3-phase iterative process over 2 days. Primary outcomes included missing value rates, extraction accuracy, and concordance between groups. Secondary outcomes included comparison with the Korean Breast Cancer Society national registry data, processing time, and resource use. Validation involved 50 stratified random samples per group (900 data points each), assessed by 4 breast surgical oncologists. Statistical analysis included chi-square tests, 2-tailed <italic>t</italic> tests, Cohen &#x03BA;, and intraclass correlation coefficients. The accuracy threshold was set at 90%.</p></sec><sec sec-type="results"><title>Results</title><p>The LLM achieved 90.8% (817) accuracy in validation analysis. Missing data patterns differed between groups: group 2 showed better lymph node documentation (missing: 152/1734, 8.76% vs 294/1366, 21.52%) but higher missing rates for cancer staging (211/1734, 12.17% vs 43/1366, 3.15%). Both groups demonstrated similar breast-conserving surgery rates (1107/1734, 63.84% vs 868/1366, 63.54%). Processing efficiency differed substantially: LLM processing required 12 days with 2 physicians versus 7 months with 5 physicians for manual review, representing a 91% reduction in physician hours (96 h vs 1025 h). The LLM group captured significantly more survival events (41 vs 11; <italic>P</italic>=.002). Stage distribution in the LLM group aligned better with national registry data (Cram&#x00E9;r V=0.03 vs 0.07). Application programming interface costs totaled US $260 for 1734 cases (US $0.15 per case).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>LLM-based curation of automatically extracted, deidentified clinical data demonstrated comparable effectiveness to manual physician review while reducing processing time by 95% and physician hours by 91%. This 2-step approach&#x2014;automated data extraction followed by LLM curation&#x2014;addresses both privacy concerns and efficiency needs. Despite limitations in integrating multiple clinical events, this methodology offers a scalable solution for clinical data extraction in oncology research. The 90.8% accuracy rate and superior capture of survival events suggest that combining automated data extraction systems with LLM processing can accelerate retrospective clinical research while maintaining data quality and patient privacy.</p></sec></abstract><kwd-group><kwd>natural language processing</kwd><kwd>breast neoplasms</kwd><kwd>data mining</kwd><kwd>clinical oncology</kwd><kwd>large language model</kwd><kwd>artificial intelligence</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Recent advances in artificial intelligence, particularly in large language models (LLMs), have demonstrated remarkable capabilities of automated data extraction and organization from complex clinical documents [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. These artificial intelligence&#x2013;driven approaches can be used to process large volumes of clinical data with a consistent methodology, potentially reducing human bias and improving the collection efficiency of research data. Although LLMs show promise in health care applications, few studies on their practical efficacy compared with that of traditional, manual processing by physicians have been published, particularly in complex areas such as the extraction of surgical oncology data [<xref ref-type="bibr" rid="ref4">4</xref>].</p></sec><sec id="s1-2"><title>Clinical Challenges in Cancer Data Curation</title><p>In the field of breast cancer surgery, retrospective data analysis presents unique challenges owing to the complexity of unstructured clinical data. Electronic health records (EHRs) contain diverse information across clinical charts, operation records, and pathology reports, often in a free-text format. The complexity is compounded by breast cancer&#x2013;specific characteristics, including the bilateral nature of the organs, concurrent malignant and benign lesions, and multiple radiological features. This complicates automated data curation and has traditionally necessitated manual review by physicians for accurate data interpretation and collection.</p><p>However, this manual approach has several limitations. As the volume of clinical data increases, consistency in physician reviews becomes increasingly difficult to maintain, potentially leading to discrepancies in data interpretation [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. The time-intensive nature of manual review and the risk of errors in the processing of large volumes of clinical data present considerable challenges in retrospective research [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. In addition, direct EHR access for manual data extraction raises privacy concerns when sensitive patient information is handled [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Although LLM-based automation of information extraction from anonymized EHR data may address these challenges, its effectiveness compared with that of traditional physician reviews remains to be evaluated.</p></sec><sec id="s1-3"><title>Study Objectives</title><p>Although LLMs have been validated for the extraction of specific medical data, their potential for the curation of comprehensive data of patients with cancer remains largely unexplored [<xref ref-type="bibr" rid="ref14">14</xref>]. In this study, we compared traditional physician reviews with LLM-based processing of anonymized clinical data in the field of breast cancer, focusing on the development of a practical approach for surgical oncologists. We hypothesized that LLM-based analysis would yield comparable results to manual review in handling large volumes of clinical data while reducing processing time and resource use.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design and Data Collection</title><p>This retrospective comparative study included patients with breast cancer who underwent surgery at 5 academic hospitals from January 1, 2019, to December 31, 2019. This study was designed to compare 2 practical data curation pathways available in real-world clinical research settings. The manual review pathway represents the traditional method of direct EHR access. The LLM processing pathway uses preextracted, deidentified data. This study adheres to the Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis&#x2013;Large Language Model (TRIPOD-LLM) reporting guidelines (<xref ref-type="supplementary-material" rid="app6">Checklist 1</xref>).</p><p>We compared 2 data extraction methods: manual physician review (group 1) and LLM-based processing (group 2). In group 1, 1 dedicated breast-surgical oncologist from each hospital reviewed data spanning 2 years (2019&#x2010;2020) over 7 months (May 2021&#x2013;November 2021) using a standardized data collection form (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The data encompassed 89 clinical variables across 3 domains: patient demographics (basic information, medical history, and family history), treatment information (surgical details, neoadjuvant or adjuvant therapy, complications, and follow-up treatment), and pathological information (tumor characteristics, tumor stage, biomarker status, and margin status). Follow-up observations regarding recurrence and mortality were updated until January 2024.</p><p>Patients in group 2 were initially identified using the clinical data warehouse (CDW) of Catholic Medical Center, an integrated data platform of 8 affiliated academic hospitals in Korea [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. The CDW supports this research by providing anonymous clinical data to investigators following institutional review board approval [<xref ref-type="bibr" rid="ref15">15</xref>]. The LLM structured 31 clinical factors from the raw data, including patient demographics (basic information, survival data, and diagnostic data), treatment information (surgery types and neoadjuvant or adjuvant therapy), pathological information (tumor characteristics, tumor stage, biomarker status, and nodal status), and imaging features. Data extraction and curation were performed from October 20, 2024, to November 1, 2024.</p><p>The CDW query identified 17,317 patients diagnosed with invasive breast cancer or ductal carcinoma in situ between July 2018 and July 2021. From this cohort, we selected patients diagnosed during the study period (January 2019-December 2019) who underwent breast cancer surgery. CDW extraction included unstructured EHR reports containing clinical information, operation records, and pathology reports (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendices 2</xref><xref ref-type="supplementary-material" rid="app3"/>-<xref ref-type="supplementary-material" rid="app4">4</xref>). Follow-up data through October 31, 2023, were used.</p></sec><sec id="s2-2"><title>Data Curation in LLM-Processing Group</title><p>Unstructured data extracted from the CDW were processed using Claude 3.5 Sonnet (Anthropic) to extract and structure the required factors into predefined categories.</p><sec id="s2-2-1"><title>LLM Implementation and Application Programming Interface (API) Access</title><p>We accessed Claude 3.5 Sonnet through the Anthropic web interface (claude.ai) using a professional subscription account. The implementation specifications are as follows:</p><list list-type="order"><list-item><p>Access method: web-based interface with manual copy-paste of clinical documents.</p></list-item><list-item><p>Processing approach: sequential processing of individual patient records.</p></list-item><list-item><p>Input size limitations: documents exceeding 100,000 characters were split into logical sections (diagnosis, surgery, and pathology) and processed sequentially.</p></list-item><list-item><p>Output format: structured CSV format directly generated by the LLM.</p></list-item><list-item><p>Session management: new conversation initiated for each batch of 50 patients to prevent context contamination.</p></list-item><list-item><p>Quality control: real-time review of outputs with immediate reprocessing for any parsing errors.</p></list-item></list><p>No API programming or authentication keys were required, as we used the standard web interface. This approach, while manual, ensured direct oversight of the extraction process and immediate error detection.</p></sec><sec id="s2-2-2"><title>Prompt Development Process</title><sec id="s2-2-2-1"><title>Overview</title><p>The LLM prompt was developed through a 3-phase iterative process over 2 days (from October 20, 2024, to October 21, 2024). Rather than manually crafting the extraction rules, we used an interactive dialogue approach with the LLM itself to develop the prompt. We provided the LLM with sample data and target output requirements, and then iteratively refined the extraction protocol through conversational feedback.</p><p>The iterative refinement process consists of 3 phases.</p></sec><sec id="s2-2-2-2"><title>Phase 1</title><p>This was the initial framework development phase and we included 10 cases. We presented the LLM with representative raw data and developed extraction rules through dialogue. The LLM proposed initial patterns for data extraction, which were tested using sample cases.</p></sec><sec id="s2-2-2-3"><title>Phase 2</title><p>This was the rule refinement phase, and we included 20 cases. On the basis of the phase 1 outputs, we organized errors through manual review and engaged LLM to analyze the errors and modify them. Key refinements included diagnosis deduplication using International Classification of Diseases, 10th Revision code comparison, surgical procedure hierarchy establishment, pathology section prioritization, and biomarker interpretation standardization (particularly for human epidermal growth factor receptor 2 [HER2] status requiring in situ hybridization confirmation for 2+ cases).</p></sec><sec id="s2-2-2-4"><title>Phase 3</title><p>This was the edge case handling phase, and we included 30 cases. The refined prompt was tested in diverse clinical scenarios. Additional instructions were added for handling bilateral cases (processing each breast separately), multiple surgical procedures (capturing all relevant operations), ambiguous staging information (requiring explicit notation rather than inference), and complex biomarker patterns (particularly HER2 equivocal cases).</p><p>The final prompt is available in the <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>. Due to its length, we provide a condensed version highlighting the key extraction rules, while the complete prompt with all edge cases and examples can be obtained from the corresponding author.</p></sec></sec></sec><sec id="s2-3"><title>Prompt Structure and Components</title><p>The final prompt focused on accurate extraction from 4 distinct data categories: diagnostic information, clinical measurements, surgical procedures, and pathological findings.</p><sec id="s2-3-1"><title>Global Processing Rules Module</title><p>This module established standardized data formats and processing conventions. Key specifications included patient ID formatting (R[9-digit number]), laterality coding (right or left or bilateral), date standardization (YYYY-MM-DD), missing data coding (999,999), and CSV output structure. Each case was processed by laterality to handle bilateral breast cancers as separate entities, maintaining distinct Case_IDs formatted as &#x201C;Patient_ID_Laterality&#x201D; (eg, R000000001_RT).</p></sec><sec id="s2-3-2"><title>Clinical Data Processing Module</title><p>This module handled diagnosis information, surgical procedures, and mortality data with specific extraction hierarchies. For diagnosis processing, the system prioritized primary diagnosis information, extracted English text only while removing Korean text, and implemented deduplication logic by comparing International Classification of Diseases, 10th Revision codes (first three characters) to identify identical diagnoses while preserving distinct diagnoses. For surgical information, we established clear precedence rules where therapeutic operations took priority over diagnostic procedures, with specific terminology mapping for breast procedures (eg, &#x201C;wide excision,&#x201D; &#x201C;modified radical mastectomy,&#x201D; and &#x201C;lumpectomy&#x201D;) and axillary procedures (sentinel lymph node biopsy [SLNB] and axillary lymph node dissection [ALND]).</p></sec><sec id="s2-3-3"><title>Pathology Data Extraction Module</title><p>Given the variability in pathology report formats, we defined a section priority hierarchy: (1) microscopic description, (2) diagnosis, (3) immunohistochemistry, and (4) gross description. Specific extraction patterns were defined within each section. For tumor size determination, the search sequence was: &#x201C;tumor size (size of largest invasive carcinoma)," &#x201C;greatest dimension of largest invasive focus,&#x201D; &#x201C;size of largest invasive focus,&#x201D; and &#x201C;estimated size (extent) of DCIS.&#x201D; For lymph node assessment, we standardized various reporting formats (eg, &#x201C;X/Y [positive/total]," &#x201C;metastatic carcinoma [n/total]," &#x201C;lymph node metastasis; present or absent [n/total]") and classified metastasis by size (macrometastasis &#x003E;2 mm, micrometastasis 0.2&#x2010;2 mm, and isolated tumor cell &#x2264;0.2 mm).</p></sec><sec id="s2-3-4"><title>Quality Control Module</title><p>this module implemented validation rules and error prevention strategies. The prompt excluded error-prone extractions, such as gross specimen sizes, surgical margin measurements, and lymph node sizes. It required explicit documentation of missing data rather than inference, implemented range validation for biomarkers (estrogen receptor or progesterone receptor or Allred score 0&#x2010;8, Ki-67 0%&#x2010;100%), and cross-reference verification between different report sections.</p></sec></sec><sec id="s2-4"><title>Task Sequence and Processing Flow</title><p>Prompt execution of tasks in a specific sequence to ensure data integrity:</p><list list-type="order"><list-item><p>Initial parsing: identify patient ID and laterality from diagnostic records</p></list-item><list-item><p>Temporal alignment: establish diagnosis date as a reference point for all subsequent data</p></list-item><list-item><p>Hierarchical extraction: process data in order of clinical importance: diagnosis &#x2192; surgery &#x2192; pathology &#x2192; imaging</p></list-item><list-item><p>Integration check: validate consistency across different data sources</p></list-item><list-item><p>Output generation: structure extracted data into a predefined .CSV format with quality flags</p></list-item></list><p>The prompt instructed the LLM to generate outputs directly in .CSV format with predefined column structures, automated date formatting (YYYY-MM-DD), and standardized missing data codes (999,999), eliminating the need for extensive after processing. The structured output was validated through the methodology described in the Data Quality Assessment and Validation section.</p></sec><sec id="s2-5"><title>Objectives and Statistical Analysis</title><p>This study aimed to assess the feasibility of replacing manual physician reviews with LLM-based processing of breast cancer&#x2013;related clinical data. We compared the demographic characteristics, clinical parameters, treatment patterns, disease characteristics, and survival outcomes between the 2 groups.</p><p>Categorical variables were compared using chi-square or Fisher exact tests, with agreement assessed using Cohen &#x03BA; coefficient (&#x03BA;&#x003C;0.20=poor, 0.21&#x2010;0.40=fair, 0.41&#x2010;0.60=moderate, 0.61&#x2010;0.80=good, &#x003E;0.80=very good). Continuous variables were analyzed using the Student <italic>t</italic> test and the intraclass correlation coefficient. Effect sizes were calculated using Cohen <italic>d</italic> (continuous) and Cram&#x00E9;r V (categorical).</p><p>Overall survival was analyzed using the Kaplan-Meier method and compared using the log-rank test. Both approaches were validated using the Korean Breast Cancer Society (KBCS) 2019 national registry data by comparing age, tumor stage, surgical procedures, molecular subtypes, and survival trends [<xref ref-type="bibr" rid="ref17">17</xref>].</p></sec><sec id="s2-6"><title>Data Quality Assessment and Validation</title><p>For validation, 50 cases from each group were selected using proportionate stratified random sampling. Stratification was based on the cancer stage (0-IV) and type of surgical intervention (breast-conserving surgery vs mastectomy) to ensure representative sampling across key clinical categories. Random selection was performed using Python (version 3.8; Python Software Foundation) with the <italic>NumPy</italic> (v.1.21.0) and <italic>pandas</italic> (v.1.3.0) libraries and a fixed random seed of 2,02,41,201 for reproducibility. Four breast-surgical oncologists (SJO, JPY, HK, and SL) independently evaluated 18 predefined clinical factors in each case (900 data points per group). Accuracy rates were calculated as the percentage of correctly extracted factors relative to the total number of factors. A dual-reference validation approach was implemented: group 1 was validated against the EHR, whereas group 2 was compared to the CDW raw data. The evaluation included both present and missing values. The accuracy threshold was set at 90% based on previous validation studies of clinical data extraction systems [<xref ref-type="bibr" rid="ref18">18</xref>].</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>This study was approved by the institutional review board of the Catholic Medical Center (approval: OC24WIDI0138). As this was a retrospective analysis of existing clinical data, the requirement for informed consent was waived by the institutional review board. All patient data were deidentified prior to analysis, with personal identifiers replaced by anonymized codes. The CDW platform ensures privacy protection through automated deidentification. No compensation was provided to participants, as this study involved retrospective data analysis only.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Outcomes</title><p>For comparative analysis, 18 key clinical factors were selected from both groups (<xref ref-type="fig" rid="figure1">Figure 1</xref>). The manual review (group 1) and LLM processing (group 2) groups comprised 1366 and 1734 cases, respectively. Although both groups completely captured age data, they exhibited different patterns of missing data for the other parameters. Group 2 had higher missing rates in terms of cancer stage (12.2% vs 3.1%) and HER2 status (15.1% vs 11.0%), whereas group 1 had more missing data for lesion size (20.5% vs 5.9%) and lymph node assessment (21.5% vs 8.8%). Both groups maintained high documentation rates (&#x003E;90%) for hormone receptor status (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p><p>The validation analysis encompassed 1800 data points (900 per group) across clinical factors. Group 1 demonstrated perfect accuracy with no discrepancies. Group 2 exhibited 83 discordant factors out of 900 data points with an accuracy rate of (817/900, 90.8%). Among 1734 patients in group 2, 260 (15%) underwent multiple surgical procedures. The LLM successfully integrated data from sequential operations in 53% (138/260) of these cases, while missing data integration in 47% (122/260).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Study design chart showing the comparison between the manual review and large language model (LLM)-processing groups. EHR: electronic health record.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e73605_fig01.png"/></fig><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Comparison of missing data rates (%) between manual review (group 1) and large language model (LLM) processing (group 2). Color intensity represents the magnitude of missing data, with darker shades indicating higher missing rates. HER2: human epidermal growth factor receptor 2.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e73605_fig02.png"/></fig></sec><sec id="s3-2"><title>Processing Time and Resource Use</title><p>LLM-based processing demonstrated efficiency gains compared to manual review (<xref ref-type="table" rid="table1">Table 1</xref>). Group 1 required 7 months (May 2021-November 2021) with 5 dedicated breast surgical oncologists. In contrast, group 2 processing was completed in 12 days (from October 20, 2024, to November 1, 2024) by 2 physicians, with data extraction taking 10 days and validation requiring 2 additional days.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Comparison of processing efficiency.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Parameter</td><td align="left" valign="bottom">Manual review (group 1)</td><td align="left" valign="bottom">LLM<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> processing (group 2)</td><td align="left" valign="bottom">Difference</td></tr></thead><tbody><tr><td align="left" valign="top">Total cases</td><td align="left" valign="top">1336</td><td align="left" valign="top">1734</td><td align="left" valign="top">+368</td></tr><tr><td align="left" valign="top">Processing period</td><td align="left" valign="top">7 months</td><td align="left" valign="top">12 days</td><td align="left" valign="top">&#x2212;95%</td></tr><tr><td align="left" valign="top">Number of physicians</td><td align="left" valign="top">5</td><td align="left" valign="top">2</td><td align="left" valign="top">&#x2212;60%</td></tr><tr><td align="left" valign="top">Total physician hours</td><td align="left" valign="top">Approximately 1025</td><td align="left" valign="top">Approximately 96</td><td align="left" valign="top">&#x2212;91%</td></tr><tr><td align="left" valign="top">Direct EHR<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> access required</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">N/A<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td></tr><tr><td align="left" valign="top">API<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup> cost</td><td align="left" valign="top">N/A<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="top">Approximately $260</td><td align="left" valign="top">N/A</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>LLM: large language model.</p></fn><fn id="table1fn2"><p><sup>b</sup>EHR: electronic health record.</p></fn><fn id="table1fn3"><p><sup>c</sup>API: application programming interface.</p></fn><fn id="table1fn4"><p><sup>d</sup>N/A: not applicable.</p></fn></table-wrap-foot></table-wrap><p>Manual review required approximately 1025 physician hours total across 7 months. LLM processing required approximately 96 physician hours in total over 12 days, a 91% reduction in time investment.</p><p>Resource use differed substantially between the methods. The manual review required 5 breast surgical oncologists with a direct EHR access infrastructure and dedicated data entry personnel. LLM processing required 2 physicians using a standard workstation without EHR access. The API cost for LLM processing was approximately $0.15 per case, totaling $260 for 1734 cases. Although direct personnel costs were not calculated due to institutional variations in physician compensation, the 91% reduction in physician hours represents substantial resource savings.</p></sec><sec id="s3-3"><title>Demographics and Clinical Characteristics</title><p>The baseline characteristics of both groups are summarized in <xref ref-type="table" rid="table2">Table 2</xref>. The mean age differed slightly between groups 1 and 2 (55, SD 11.5 vs 53.5, SD 11.4 y; <italic>P</italic>&#x003C;.001; Cohen <italic>d</italic>=0.13). For breast surgery, total mastectomy was performed in 19.2% of cases in group 1 and 26.5% in group 2, while nipple (skin)-sparing mastectomy rates were 15.7% and 9.6%, respectively (<italic>&#x03C7;</italic><sup>2<sub>3</sub></sup>=164.3; <italic>P</italic>&#x003C;.001; Cram&#x00E9;r V=0.29). When these procedures were combined, groups 1 and 2 exhibited similar proportions of breast-conserving surgery (63.5% vs 63.9%) and mastectomy (34.8% vs 36%), with a small effect size (Cram&#x00E9;r V=0.10).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Baseline characteristics of study groups. Statistical significance set at <italic>P</italic>&#x003C;.05. Analyses were performed using chi-square test for categorical variables and <italic>t</italic> test for continuous variables<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristics</td><td align="left" valign="bottom">Manual review (n=1366)</td><td align="left" valign="bottom">LLM<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> processing (n=1734)</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Demographics, mean (SD)</td></tr><tr><td align="left" valign="top">&#x2003;Age (y)</td><td align="left" valign="top">55.0 (11.5)</td><td align="left" valign="top">53.5 (11.4)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="4">Surgical procedures, n</td></tr><tr><td align="left" valign="top" colspan="3">&#x2003;Breast operation</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Breast-conserving surgery</td><td align="left" valign="top">63.53 (868/1366)</td><td align="left" valign="top">63.90 (949/1485)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Total mastectomy</td><td align="left" valign="top">19.18 (262/1366)</td><td align="left" valign="top">26.46 (393/1485)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;N(S)SM<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">15.67 (214/1366)</td><td align="left" valign="top">9.56 (142/1485)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Other procedures</td><td align="left" valign="top">1.61 (22/1366)</td><td align="left" valign="top">0.07 (1/1485)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Combined mastectomy<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">34.85 (476/1366)</td><td align="left" valign="top">36.03 (535/1485)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="3">&#x2003;Axillary surgery</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;No surgery</td><td align="left" valign="top">11.20 (153/1366)</td><td align="left" valign="top">19.37 (321/1657)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;SLNB<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">68.30 (933/1366)</td><td align="left" valign="top">59.75 (990/1657)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;ALND<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup></td><td align="left" valign="top">20.50 (280/1366)</td><td align="left" valign="top">20.88 (346/1657)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Pathological results, mean (SD)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Tumor size (mm)</td><td align="left" valign="top">20.5 (16.4)</td><td align="left" valign="top">21.5 (16.9)</td><td align="left" valign="top">.156</td></tr><tr><td align="left" valign="top" colspan="4">&#x2003;Lymph node status, mean (SD)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Harvested nodes</td><td align="left" valign="top">7.79 (7.0)</td><td align="left" valign="top">7.11 (7.2)</td><td align="left" valign="top">.016</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Metastatic nodes</td><td align="left" valign="top">0.95 (3.0)</td><td align="left" valign="top">0.98 (3.2)</td><td align="left" valign="top">.802</td></tr><tr><td align="left" valign="top" colspan="3">&#x2003;Stage distribution, n (%)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;0</td><td align="left" valign="top">17.91 (237/1323)</td><td align="left" valign="top">15.56 (237/1523)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;IA</td><td align="left" valign="top">35.68 (472/1323)</td><td align="left" valign="top">43.27 (659/1523)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;IB</td><td align="left" valign="top">0.60 (8/1323)</td><td align="left" valign="top">1.31 (20/1523)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;IIA</td><td align="left" valign="top">22.22 (294/1323)</td><td align="left" valign="top">20.75 (316/1523)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;IIB</td><td align="left" valign="top">11.11 (147/1323)</td><td align="left" valign="top">10.83 (165/1523)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;IIIA</td><td align="left" valign="top">7.48 (99/1323)</td><td align="left" valign="top">5.65 (86/1523)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;IIIB</td><td align="left" valign="top">0.83 (11/1323)</td><td align="left" valign="top">0.07 (1/1523)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;IIIC</td><td align="left" valign="top">3.70 (49/1323)</td><td align="left" valign="top">2.56 (39/1523)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;IV</td><td align="left" valign="top">0.45 (6/1323)</td><td align="left" valign="top">0.00 (0/1523)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="4">&#x2003;Biomarker status</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;ER<sup><xref ref-type="table-fn" rid="table2fn7">g</xref></sup> positive, % (n/N)</td><td align="left" valign="top">78.28 (1012/1293)</td><td align="left" valign="top">76.21 (1198/1572)</td><td align="left" valign="top">.172</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;PR<sup><xref ref-type="table-fn" rid="table2fn8">h</xref></sup> positive, % (n/N)</td><td align="left" valign="top">68.68 (886/1290)</td><td align="left" valign="top">67.50 (1059/1569)</td><td align="left" valign="top">.525</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;HER2<sup><xref ref-type="table-fn" rid="table2fn9">i</xref></sup> positive, % (n/N)</td><td align="left" valign="top">20.49 (249/1215)</td><td align="left" valign="top">20.20 (298/1475)</td><td align="left" valign="top">.003</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Ki-67, mean (SD)</td><td align="left" valign="top">25.4 (23.0)</td><td align="left" valign="top">26.6 (22.6)</td><td align="left" valign="top">.204</td></tr><tr><td align="left" valign="top" colspan="3">&#x2003;Histologic grade, % (n/N)</td><td align="left" valign="top">.764</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Grade 1</td><td align="left" valign="top">22.81 (211/925)</td><td align="left" valign="top">21.54 (283/1314)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Grade 2</td><td align="left" valign="top">44.65 (413/925)</td><td align="left" valign="top">46.35 (609/1314)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Grade 3</td><td align="left" valign="top">32.54 (301/925)</td><td align="left" valign="top">32.12 (422/1314)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Nuclear grade, % (n/N)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Grade 1</td><td align="left" valign="top">16.75 (171/1021)</td><td align="left" valign="top">12.90 (199/1543)</td><td align="left" valign="top" rowspan="3"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Grade 2</td><td align="left" valign="top">42.51 (434/1021)</td><td align="left" valign="top">51.52 (795/1543)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Grade 3</td><td align="left" valign="top">40.78 (416/1021)</td><td align="left" valign="top">35.58 (549/1543)</td></tr><tr><td align="left" valign="top">&#x2003;Survival outcomes, % (n/N)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Death</td><td align="left" valign="top">0.81 (11/1366)</td><td align="left" valign="top">2.42 (42/1734)</td><td align="left" valign="top">.001</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Percentages calculated based on cases with available data for each variable</p></fn><fn id="table2fn2"><p><sup>b</sup>LLM: large language model.</p></fn><fn id="table2fn3"><p><sup>c</sup>N(S)SM: nipple or skin-sparing mastectomy.</p></fn><fn id="table2fn4"><p><sup>d</sup>Combined mastectomy includes total mastectomy and N(S)SM.</p></fn><fn id="table2fn5"><p><sup>e</sup>SLNB: sentinel lymph node biopsy.</p></fn><fn id="table2fn6"><p><sup>f</sup>ALND: axillary lymph node dissection.</p></fn><fn id="table2fn7"><p><sup>g</sup>ER: estrogen receptor.</p></fn><fn id="table2fn8"><p><sup>h</sup>PR: progesterone receptor.</p></fn><fn id="table2fn9"><p><sup>i</sup>HER2: human epidermal growth factor receptor 2.</p></fn></table-wrap-foot></table-wrap><p>In terms of axillary surgery, group 1 had a higher rate of SLNB than group 2 (68.4% vs 59.7%), whereas the rates of ALND were similar (<italic>&#x03C7;</italic><sup>2</sup><sub>2</sub>=47.2; <italic>P</italic>&#x003C;.001; Cram&#x00E9;r V=0.13). The mean (SD) value of the harvested lymph nodes was similar between the groups (7.79, SD 7.03 vs 7.11, SD 7.20; <italic>P</italic>=.016).</p><p>The stage distribution differed between groups (<italic>&#x03C7;</italic><sup>2</sup><sub>8</sub>=68.9<italic>; P</italic>&#x003C;.001), but only slightly (Cram&#x00E9;r V=0.16), with group 1 identifying more cancers as advanced. Hormone receptor status was similar between the groups (estrogen receptor: 78.3% vs 76.2%, <italic>P</italic>=.172; progesterone receptor: 68.7% vs 67.5%, <italic>P</italic>=.525). HER2 status differed negligibly, and Ki67 expression was similar between the groups (HER2: <italic>P</italic>=.003, Cram&#x00E9;r V=0.003; Ki67: <italic>P</italic>=.391, Cram&#x00E9;r V was approximately 0.00). Histological grade distributions were similar between the groups (<italic>P</italic>=.764).</p></sec><sec id="s3-4"><title>Interrater Agreement Analysis</title><p>ICC analysis of continuous variables demonstrated a consistently low agreement: age (ICC 0.013, 95% CI &#x2013;0.035 to 0.060), tumor size (ICC 0.029, 95% CI &#x2013;0.021 to 0.078), number of metastatic lymph nodes (ICC 0.031, 95% CI &#x2013;0.019 to 0.081), number of harvested lymph nodes (ICC 0.025, 95% CI &#x2013;0.025 to 0.075), and Ki67 expression (ICC 0.027, 95% CI &#x2013;0.023 to 0.077). All ICC values were negligible, and all CIs included zero.</p></sec><sec id="s3-5"><title>Survival Outcomes</title><p>Survival analysis revealed significant differences between the groups (hazard ratio 2.917, 95% CI 1.496 to 5.688; <italic>P</italic>=.002). Group 2 captured more events (11 vs 41). The proportional hazards assumption was met (&#x03C7;&#x00B2;<sub>&#x2081;</sub>=2.37; <italic>P</italic>=.120), and the log-rank test confirmed a difference in survival distributions (&#x03C7;&#x00B2;<sub>&#x2081;</sub>=10.9; <italic>P</italic>=.001).</p></sec><sec id="s3-6"><title>Comparison With National Registry Data</title><p>Comparison with the KBCS 2019 registry data (N=9447) revealed small differences in breast surgery patterns for both groups (Cram&#x00E9;r V=0.03&#x2010;0.04; <italic>P</italic>&#x2264;.018; <xref ref-type="table" rid="table3">Table 3</xref>). For axillary surgery, both groups had lower SLNB rates (group 1: 68.30% and group 2: 59.75% vs KBCS: 73.18%) and similar ALND rates (20.50% vs 20.88% vs 18.60%). Group 2 had a higher rate of no axillary surgery than group 1 (19.37% vs 11.20%).</p><p>Stage distribution analysis revealed significant but small differences from the national data (group 1: Cram&#x00E9;r V=0.076, <italic>P</italic>&#x003C;.001; group 2: Cram&#x00E9;r V=0.038, <italic>P</italic>=.003). Regarding biomarker subtypes, both groups had slightly higher proportions of hormone receptor&#x2013;positive with HER2-negative (group 1: 67.02% and group 2: 66.57% vs KBCS: 63.14%) and triple-negative cases (12.70% and 13.59% vs 11.98%) with minimal effect sizes (Cram&#x00E9;r V=0.03&#x2010;0.04).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Comparison of clinical characteristics with Korean Breast Cancer Society (KBCS) 2019 national registry data<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristic</td><td align="left" valign="bottom">Group 1 (n=1366)</td><td align="left" valign="bottom">Group 2 (n=1734)</td><td align="left" valign="bottom">KBCS 2019 (N=9447)</td><td align="left" valign="bottom">Effect size (Cram&#x00E9;r's V)</td><td align="left" valign="bottom">P value<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Breast surgery type, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="char" char="hyphen" valign="top"><list list-type="bullet"><list-item><p>0.03-0.04</p></list-item></list></td><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Breast-conserving surgery</td><td align="char" char="." valign="top">868 (63.53)</td><td align="char" char="." valign="top">949 (63.90)</td><td align="char" char="." valign="top">6067 (64.26)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total mastectomy</td><td align="char" char="." valign="top">476 (34.85)</td><td align="char" char="." valign="top">535 (36.03)</td><td align="char" char="." valign="top">3380 (35.78)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Others</td><td align="char" char="." valign="top">22 (1.61)</td><td align="char" char="." valign="top">1 (0.07)</td><td align="char" char="." valign="top">0 (0.00)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Axillary surgery type, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Group 1: 0.13</p></list-item><list-item><p>Group 2: 0.15</p></list-item></list></td><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SLNB<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="char" char="." valign="top">933 (68.30)</td><td align="char" char="." valign="top">990 (59.75)</td><td align="char" char="." valign="top">6913 (73.18)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ALND<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="char" char="." valign="top">280 (20.50)</td><td align="char" char="." valign="top">346 (20.88)</td><td align="char" char="." valign="top">1757 (18.60)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No surgery</td><td align="char" char="." valign="top">153 (11.20)</td><td align="char" char="." valign="top">321 (19.37)</td><td align="char" char="." valign="top">777 (8.22)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Cancer stage distribution, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Group 1: 0.07</p></list-item><list-item><p>Group 2: 0.03</p></list-item></list></td><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001</p></list-item><list-item><p>0.003</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stage 0</td><td align="char" char="." valign="top">237 (17.91)</td><td align="char" char="." valign="top">237 (15.56)</td><td align="char" char="." valign="top">1588 (16.81)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stage I</td><td align="char" char="." valign="top">480 (36.28)</td><td align="char" char="." valign="top">679 (44.58)</td><td align="char" char="." valign="top">4015 (42.50)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stage II</td><td align="char" char="." valign="top">441 (33.33)</td><td align="char" char="." valign="top">481 (31.58)</td><td align="char" char="." valign="top">2948 (31.20)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stage III</td><td align="char" char="." valign="top">159 (12.02)</td><td align="char" char="." valign="top">126 (8.27)</td><td align="char" char="." valign="top">896 (9.48)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Biomarker status,<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup> n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="char" char="hyphen" valign="top"><list list-type="bullet"><list-item><p>0.03-0.04</p></list-item></list></td><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>0.003</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ER<sup><xref ref-type="table-fn" rid="table3fn7">k</xref></sup> positive</td><td align="char" char="." valign="top">1012 (78.28)</td><td align="char" char="." valign="top">1198 (76.21)</td><td align="char" char="." valign="top">7163 (75.82)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PR<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup> positive</td><td align="char" char="." valign="top">886 (68.68)</td><td align="char" char="." valign="top">1059 (67.50)</td><td align="char" char="." valign="top">6254 (66.20)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>HER2<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup> positive</td><td align="char" char="." valign="top">249 (20.49)</td><td align="char" char="." valign="top">298 (20.20)</td><td align="char" char="." valign="top">1748 (18.50)</td><td align="left" valign="top"/><td align="char" char="." valign="top"/></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Percentages calculated based on cases with available data for each variable</p></fn><fn id="table3fn2"><p><sup>b</sup>Statistical significance set at <italic>P</italic>&#x003C;.05. Chi-square tests were used for categorical comparisons.</p></fn><fn id="table3fn3"><p><sup>c</sup>KBCS: Korean Breast Cancer Society</p></fn><fn id="table3fn4"><p><sup>d</sup>SLNB: sentinel lymph node biopsy.</p></fn><fn id="table3fn5"><p><sup>e</sup>ALND: axillary lymph node dissection.</p></fn><fn id="table3fn6"><p><sup>f</sup>Molecular subtypes were determined based on combined estrogen receptor (ER), progesterone receptor (PR), and HER2 status. HR+ defined as ER+ or PR+.</p></fn><fn id="table3fn7"><p><sup>g</sup>ER: estrogen receptor.</p></fn><fn id="table3fn8"><p><sup>h</sup>PR: progesterone receptor.</p></fn><fn id="table3fn9"><p><sup>i</sup>HER2: human epidermal growth factor receptor 2.</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This comparative study demonstrated that LLM-based processing achieved 90.8% accuracy in extracting clinical data from breast cancer records, with significant reductions in processing time (12 d vs 7 mo) and resource requirements (2 vs 5 physicians). The LLM approach captured substantially more survival events (41 vs 11; <italic>P</italic>=.002) and showed better documentation of lymph node assessment (91.2% vs 78.5%), although it had higher missing rates for integrated assessments such as cancer staging (12.2% vs 3.1%). Both methods yielded similar patterns in key clinical parameters, including breast-conserving surgery rates (63.5% vs 63.9%) and biomarker distributions.</p><p>A particular finding was the substantial difference in captured survival events between LLM processing (41 events) and manual review (11 events). This discrepancy may reflect fundamental differences in how humans and LLMs approach large-scale data extraction. Manual reviewers processing many charts may inadvertently adopt a mechanical approach, focusing on the most obvious data fields, while potentially overlooking mortality information scattered across multiple sections of the medical record. In contrast, LLM maintained consistent thoroughness throughout the extraction process, systematically examining all available data sources for each case without the cognitive fatigue that affects human reviewers during repetitive tasks.</p><p>This finding challenges the assumption that manual review is the gold standard for all types of clinical data extraction. While human expertise remains essential for complex clinical interpretation, our results suggest that LLM processing may provide a more complete capture of certain objective outcomes, particularly those requiring synthesis across multiple data fields. However, this interpretation requires further research to confirm whether the additional events captured by the LLM represent true positives or extraction errors.</p></sec><sec id="s4-2"><title>Comparison With Prior Work</title><p>Although LLMs have shown favorable results in extracting specific medical data from radiology and pathology reports [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref22">22</xref>], our study represents the first comprehensive evaluation of surgical oncology data curation. Previous studies have focused on the extraction of single data points or specific types of reports. For instance, Park et al [<xref ref-type="bibr" rid="ref20">20</xref>] demonstrated the effectiveness of LLM in extracting pulmonary disease information from radiology reports, whereas Cheng [<xref ref-type="bibr" rid="ref22">22</xref>] reviewed its applications in pathology. Our study extends these findings by demonstrating that LLMs can handle the complex integration required for comprehensive oncological data including surgical procedures, pathological findings, and survival outcomes.</p><p>The observed statistical differences between groups, while significant, were mostly clinically negligible (Cram&#x00E9;r V&#x003C;0.30 for all comparisons), suggesting that LLM processing maintains clinical validity. The observed accuracy of 90.8% is comparable to recent studies, where LLMs achieved similar performance in extracting structured information from clinical notes, including the social determinants of health [<xref ref-type="bibr" rid="ref23">23</xref>]. Although comprehensive systematic reviews of LLM applications in healthcare are still emerging [<xref ref-type="bibr" rid="ref3">3</xref>], individual studies have consistently demonstrated their potential for automated clinical data extraction. However, unlike previous studies that focused on extracting discrete clinical variables or single-domain information [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref23">23</xref>], surgical oncology data require integration of multiple interconnected factors, presenting unique challenges in automated extraction.</p></sec><sec id="s4-3"><title>Strengths and Limitations</title><p>Our study had several strengths. The large sample size provided robust statistical power. The use of real-world clinical data from multiple institutions enhanced the generalizability of the results. The head-to-head comparison with a manual physician review rather than against a reference standard alone provides practical insights for clinical research applications. Additionally, the demonstrated efficiency gains (12 d vs 7 mo) highlight the potential for scaling oncological research.</p><p>However, several limitations of this study must be acknowledged.</p><p>First, LLMs showed limitations in integrating multiple clinical events. Although the model performed well in extracting explicit data points, it struggled to synthesize information across sequential surgical procedures. This was evidenced by the higher rate of missing surgical data (12.2% vs 3.1%) in patients who underwent multiple operations. Manual reviewers could identify and integrate multiple surgical steps, such as lymph node assessment after initial diagnostic excision, while the LLM typically captured data from a single representative operation.</p><p>Second, the validation sample size of 50 cases per group represented only 2.9% and 2.6% of the respective cohorts. While this sample was stratified to ensure representation across cancer stages and surgical types and achieved adequate power for detecting clinically meaningful differences, a larger validation set would strengthen confidence in the accuracy estimates.</p><p>Third, differences in data sources between the groups may have affected direct comparability. The manual review group accessed complete EHRs directly, whereas the LLM group processed CDW-extracted data. This methodological design was unavoidable; manual reviewers needed direct EHR access for a comprehensive review, while the LLM required deidentified extracted data for processing. However, this means we cannot isolate whether performance differences stemmed from the extraction approach itself or from the inherent differences in available data. Future studies comparing both methods using identical raw data would overcome this limitation.</p><p>Fourth, generalizability to other LLM models requires evaluation. We used Claude 3.5 Sonnet, but the performance may vary across models, versions, and prompting strategies. The rapid evolution of LLM capabilities suggests that our findings represent a snapshot of current technology rather than definitive limits.</p></sec><sec id="s4-4"><title>Future Directions</title><p>Future research should address several key issues. The first is the development of improved prompting strategies to handle complex clinical scenarios that require integrated assessment, particularly for sequential surgical procedures and temporal relationships. Second, LLM processing is compared with manual review using identical raw data sources to isolate the actual performance differences between the methods. Third, feasibility studies of integrated data curation across multiple clinical events are needed to overcome the current limitations in synthesizing longitudinal patient data. Fourth, a systematic examination of the performance characteristics of different LLM models is required to identify optimal models for specific clinical data types.</p></sec><sec id="s4-5"><title>Conclusions</title><p>LLM-based processing demonstrated comparable effectiveness to manual review by physicians for breast cancer clinical data extraction, while significantly reducing processing time and resource utilization. Despite the limitations of integrated assessments requiring synthesis across multiple clinical events, this approach offers a solution for efficient clinical data extraction in oncology research. The ability to process large volumes of data consistently and rapidly while maintaining an accuracy above 90% suggests that LLM-based methods can accelerate retrospective clinical research.</p></sec></sec></body><back><ack><p>This work was supported by the National Research Foundation of Korea grant funded by the Korea government (Ministry of Science and ICT; RS-2025-19643006). This study used the Claude 3.5 Sonnet (Anthropic) web interface (claude.ai) for large language model (LLM)-based data extraction, which did not require programming code. The extraction was performed through manual copy-paste operations with the structured prompt provided in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>. Statistical analyses were performed using Python (version 3.8; Python Software Foundation) with NumPy (v.1.21.0) and pandas (v.1.3.0) libraries.</p></ack><fn-group><fn fn-type="con"><p>Conceptualization: YJK</p><p>Data curation: YJK, JPY, HK, CIY, JMB, YSK, YWJ, JR</p><p>Formal analysis: YJK, HL</p><p>Funding acquisition:</p><p>Investigation: JPY, HK, SHL, SJO</p><p>Methodology: YJK, HL</p><p>Project administration: YJK</p><p>Resources: CIY, JMB, YSK, YWJ, JR</p><p>Software: YJK</p><p>Supervision: HC, SJO</p><p>Validation: HL, JPY, HK, SHL</p><p>Writing &#x2013; original draft: YJK</p><p>Writing &#x2013; review &#x0026; editing: YJK, HL, HC, SJO</p><p>All authors have read and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ALND</term><def><p>axillary lymph node dissection</p></def></def-item><def-item><term id="abb2">API</term><def><p>application programming interface</p></def></def-item><def-item><term id="abb3">CDW</term><def><p>clinical data warehouse</p></def></def-item><def-item><term id="abb4">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb5">HER2</term><def><p>human epidermal growth factor receptor 2</p></def></def-item><def-item><term id="abb6">ICC</term><def><p>intraclass correlation coefficient</p></def></def-item><def-item><term id="abb7">KBCS</term><def><p>Korean Breast Cancer Society</p></def></def-item><def-item><term id="abb8">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb9">SLNB</term><def><p>sentinel lymph node biopsy</p></def></def-item><def-item><term id="abb10">TRIPOD-LLM</term><def><p>Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis&#x2013;Large Language Model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Baclic</surname><given-names>O</given-names> </name><name name-style="western"><surname>Tunis</surname><given-names>M</given-names> </name><name name-style="western"><surname>Young</surname><given-names>K</given-names> </name><name name-style="western"><surname>Doan</surname><given-names>C</given-names> </name><name name-style="western"><surname>Swerdfeger</surname><given-names>H</given-names> </name><name name-style="western"><surname>Schonfeld</surname><given-names>J</given-names> </name></person-group><article-title>Challenges and opportunities for public health made possible by advances in natural language processing</article-title><source>Can Commun Dis Rep</source><year>2020</year><month>06</month><day>4</day><volume>46</volume><issue>6</issue><fpage>161</fpage><lpage>168</lpage><pub-id pub-id-type="doi">10.14745/ccdr.v46i06a02</pub-id><pub-id pub-id-type="medline">32673380</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Minaee</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mikolov</surname><given-names>T</given-names> </name><name name-style="western"><surname>Nikzad</surname><given-names>N</given-names> </name><name name-style="western"><surname>Chenaghlu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Socher</surname><given-names>R</given-names> </name><name name-style="western"><surname>Amatriain</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Large language models: a survey</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 9, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2402.06196</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bedi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Orr-Ewing</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Testing and evaluation of health care applications of large language models: a systematic review</article-title><source>JAMA</source><year>2025</year><month>01</month><day>28</day><volume>333</volume><issue>4</issue><fpage>319</fpage><lpage>328</lpage><pub-id pub-id-type="doi">10.1001/jama.2024.21700</pub-id><pub-id pub-id-type="medline">39405325</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stroganov</surname><given-names>O</given-names> </name><name name-style="western"><surname>Schedlbauer</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lorenzen</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Unpacking unstructured data: A pilot study on extracting insights from neuropathological reports of Parkinson&#x2019;s disease patients using large language models</article-title><source>Biol Methods Protoc</source><year>2024</year><volume>9</volume><issue>1</issue><fpage>bpae072</fpage><pub-id pub-id-type="doi">10.1093/biomethods/bpae072</pub-id><pub-id pub-id-type="medline">39464853</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Guo</surname><given-names>C</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>J</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Nakamori</surname><given-names>Y</given-names> </name></person-group><article-title>Big data analytics in healthcare</article-title><source>Knowledge Technology and Systems: Toward Establishing Knowledge Systems Science</source><year>2023</year><publisher-name>Springer Nature Singapore</publisher-name><fpage>27</fpage><lpage>70</lpage><pub-id pub-id-type="doi">10.1007/978-981-99-1075-5_2</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Wani</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Jabin</surname><given-names>S</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Aggarwal</surname><given-names>V</given-names> </name><name name-style="western"><surname>Bhatnagar</surname><given-names>V</given-names> </name><name name-style="western"><surname>Mishra</surname><given-names>D</given-names> </name></person-group><article-title>Big data: issues, challenges, and techniques in business intelligence</article-title><source>Big Data Analytics Advances in Intelligent Systems and Computing</source><year>2018</year><publisher-name>Springer Singapore</publisher-name><pub-id pub-id-type="doi">10.1007/978-981-10-6620-7_59</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garza</surname><given-names>MY</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>T</given-names> </name><name name-style="western"><surname>Ounpraseuth</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Error rates of data processing methods in clinical research: a systematic review and meta-analysis of manuscripts identified through PubMed</article-title><source>Int J Med Inform</source><year>2025</year><month>03</month><volume>195</volume><issue>105749</issue><fpage>105749</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2024.105749</pub-id><pub-id pub-id-type="medline">39647291</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>W</given-names> </name><name name-style="western"><surname>Krishnan</surname><given-names>E</given-names> </name></person-group><article-title>Big data and clinicians: a review on the state of the science</article-title><source>JMIR Med Inform</source><year>2014</year><month>01</month><day>17</day><volume>2</volume><issue>1</issue><fpage>e1</fpage><pub-id pub-id-type="doi">10.2196/medinform.2913</pub-id><pub-id pub-id-type="medline">25600256</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hoffman</surname><given-names>S</given-names> </name></person-group><article-title>Big data analytics: What can go wrong</article-title><source>Ind Health L Rev</source><year>2018</year><volume>15</volume><fpage>227</fpage><pub-id pub-id-type="doi">10.18060/3911.0048</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Han</surname><given-names>F</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>H</given-names> </name></person-group><article-title>Challenges of big data analysis</article-title><source>Natl Sci Rev</source><year>2014</year><month>06</month><volume>1</volume><issue>2</issue><fpage>293</fpage><lpage>314</lpage><pub-id pub-id-type="doi">10.1093/nsr/nwt032</pub-id><pub-id pub-id-type="medline">25419469</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Kang</surname><given-names>HJ</given-names> </name><etal/></person-group><article-title>Fine-tuning LLMs with medical data: can safety be ensured?</article-title><source>NEJM AI</source><year>2025</year><month>01</month><volume>2</volume><issue>1</issue><fpage>AIcs2400390</fpage><pub-id pub-id-type="doi">10.1056/AIcs2400390</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rengers</surname><given-names>TA</given-names> </name><name name-style="western"><surname>Thiels</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Salehinejad</surname><given-names>H</given-names> </name></person-group><article-title>Academic surgery in the era of large language models: a review</article-title><source>JAMA Surg</source><year>2024</year><month>04</month><day>1</day><volume>159</volume><issue>4</issue><fpage>445</fpage><lpage>450</lpage><pub-id pub-id-type="doi">10.1001/jamasurg.2023.6496</pub-id><pub-id pub-id-type="medline">38353991</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jeyaraman</surname><given-names>M</given-names> </name><name name-style="western"><surname>Balaji</surname><given-names>S</given-names> </name><name name-style="western"><surname>Jeyaraman</surname><given-names>N</given-names> </name><name name-style="western"><surname>Yadav</surname><given-names>S</given-names> </name></person-group><article-title>Unraveling the ethical enigma: artificial intelligence in healthcare</article-title><source>Cureus</source><year>2023</year><month>08</month><volume>15</volume><issue>8</issue><fpage>e43262</fpage><pub-id pub-id-type="doi">10.7759/cureus.43262</pub-id><pub-id pub-id-type="medline">37692617</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wulcan</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Jacques</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>MA</given-names> </name><etal/></person-group><article-title>Classification performance and reproducibility of GPT-4 omni for information extraction from veterinary electronic health records</article-title><source>Front Vet Sci</source><year>2024</year><volume>11</volume><fpage>1490030</fpage><pub-id pub-id-type="doi">10.3389/fvets.2024.1490030</pub-id><pub-id pub-id-type="medline">39885843</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Choi</surname><given-names>IY</given-names> </name><name name-style="western"><surname>Park</surname><given-names>S</given-names> </name><name name-style="western"><surname>Park</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Development of prostate cancer research database with the clinical data warehouse technology for direct linkage with electronic medical record system</article-title><source>Prostate Int</source><year>2013</year><volume>1</volume><issue>2</issue><fpage>59</fpage><lpage>64</lpage><pub-id pub-id-type="doi">10.12954/PI.12015</pub-id><pub-id pub-id-type="medline">24223403</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Park</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Machine learning prediction of dropping out of outpatients with alcohol use disorders</article-title><source>PLoS One</source><year>2021</year><volume>16</volume><issue>8</issue><fpage>e0255626</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0255626</pub-id><pub-id pub-id-type="medline">34339461</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Choi</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Park</surname><given-names>CS</given-names> </name><etal/></person-group><article-title>Breast cancer statistics in Korea, 2019</article-title><source>J Breast Cancer</source><year>2023</year><month>06</month><volume>26</volume><issue>3</issue><fpage>207</fpage><lpage>220</lpage><pub-id pub-id-type="doi">10.4048/jbc.2023.26.e27</pub-id><pub-id pub-id-type="medline">37387348</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Woodfield</surname><given-names>R</given-names> </name><name name-style="western"><surname>Grant</surname><given-names>I</given-names> </name><collab>UK Biobank Stroke Outcomes Group</collab><collab>UK Biobank Follow-Up and Outcomes Working Group</collab><name name-style="western"><surname>Sudlow</surname><given-names>CLM</given-names> </name></person-group><article-title>Accuracy of electronic health record data for identifying stroke cases in large-scale epidemiological studies: a systematic review from the UK Biobank Stroke Outcomes Group</article-title><source>PLoS ONE</source><year>2015</year><volume>10</volume><issue>10</issue><fpage>e0140533</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0140533</pub-id><pub-id pub-id-type="medline">26496350</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>L&#x00F3;pez-&#x00DA;beda</surname><given-names>P</given-names> </name><name name-style="western"><surname>Mart&#x00ED;n-Noguerol</surname><given-names>T</given-names> </name><name name-style="western"><surname>Juluru</surname><given-names>K</given-names> </name><name name-style="western"><surname>Luna</surname><given-names>A</given-names> </name></person-group><article-title>Natural language processing in radiology: update on clinical applications</article-title><source>J Am Coll Radiol</source><year>2022</year><month>11</month><volume>19</volume><issue>11</issue><fpage>1271</fpage><lpage>1285</lpage><pub-id pub-id-type="doi">10.1016/j.jacr.2022.06.016</pub-id><pub-id pub-id-type="medline">36029890</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Park</surname><given-names>HJ</given-names> </name><name name-style="western"><surname>Huh</surname><given-names>JY</given-names> </name><name name-style="western"><surname>Chae</surname><given-names>G</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>MG</given-names> </name></person-group><article-title>Extraction of clinical data on major pulmonary diseases from unstructured radiologic reports using a large language model</article-title><source>PLoS ONE</source><year>2024</year><volume>19</volume><issue>11</issue><fpage>e0314136</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0314136</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Soni</surname><given-names>N</given-names> </name><name name-style="western"><surname>Ora</surname><given-names>M</given-names> </name><name name-style="western"><surname>Agarwal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Bathla</surname><given-names>G</given-names> </name></person-group><article-title>A review of the opportunities and challenges with large language models in radiology: the road ahead</article-title><source>AJNR Am J Neuroradiol</source><year>2024</year><month>11</month><day>21</day><fpage>ajnr</fpage><pub-id pub-id-type="doi">10.3174/ajnr.A8589</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cheng</surname><given-names>J</given-names> </name></person-group><article-title>Applications of large language models in pathology</article-title><source>Bioengineering (Basel)</source><year>2024</year><month>03</month><day>31</day><volume>11</volume><issue>4</issue><fpage>342</fpage><pub-id pub-id-type="doi">10.3390/bioengineering11040342</pub-id><pub-id pub-id-type="medline">38671764</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gu</surname><given-names>B</given-names> </name><name name-style="western"><surname>Shao</surname><given-names>V</given-names> </name><name name-style="western"><surname>Liao</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Scalable information extraction from free text electronic health records using large language models</article-title><source>BMC Med Res Methodol</source><year>2025</year><month>01</month><day>28</day><volume>25</volume><issue>1</issue><fpage>23</fpage><pub-id pub-id-type="doi">10.1186/s12874-025-02470-z</pub-id><pub-id pub-id-type="medline">39871166</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Standardized data collection form used for manual physician review</p><media xlink:href="medinform_v13i1e73605_app1.png" xlink:title="PNG File, 492 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Example of grouped data sheets extracted from the clinical data warehouse.</p><media xlink:href="medinform_v13i1e73605_app2.png" xlink:title="PNG File, 328 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Pathology reports extracted from the clinical data warehouse</p><media xlink:href="medinform_v13i1e73605_app3.png" xlink:title="PNG File, 23585 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Radiology reports extracted from the clinical data warehouse.</p><media xlink:href="medinform_v13i1e73605_app4.png" xlink:title="PNG File, 496 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>The condensed prompt.</p><media xlink:href="medinform_v13i1e73605_app5.docx" xlink:title="DOCX File, 24 KB"/></supplementary-material><supplementary-material id="app6"><label>Checklist 1</label><p>TRIPOD-LLM checklist.</p><media xlink:href="medinform_v13i1e73605_app6.pdf" xlink:title="PDF File, 70 KB"/></supplementary-material></app-group></back></article>