<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v14i1e84396</article-id>
      <article-id pub-id-type="pmid">42202288</article-id>
      <article-id pub-id-type="doi">10.2196/84396</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Multimodal Prediction of Renal Tumor Malignancy From Radiology Reports and Structured Electronic Health Records: Retrospective Cohort Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Coristine</surname>
            <given-names>Andrew</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jiao</surname>
            <given-names>Zhicheng</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Akinpeloye</surname>
            <given-names>Olajide</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Fan</surname>
            <given-names>Zhengkang</given-names>
          </name>
          <degrees>MSCS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0002-5115-5902</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Liang</surname>
            <given-names>Renjie</given-names>
          </name>
          <degrees>MSCS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0002-2415-3979</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>Chengkun</given-names>
          </name>
          <degrees>MSCS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2095-9369</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Pan</surname>
            <given-names>Jinqian</given-names>
          </name>
          <degrees>MSCS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-0695-9896</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Terry</surname>
            <given-names>Russell</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3659-060X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>Jie</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Heath Outcome and Biomedical Informatics</institution>
            <institution>College of Medicine</institution>
            <institution>University of Florida</institution>
            <addr-line>1889 Museum Rd #7002</addr-line>
            <addr-line>Gainesville, FL, 32610</addr-line>
            <country>United States</country>
            <phone>1 352 627 9467</phone>
            <email>xujie@ufl.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5291-5198</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Heath Outcome and Biomedical Informatics</institution>
        <institution>College of Medicine</institution>
        <institution>University of Florida</institution>
        <addr-line>Gainesville, FL</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Urology</institution>
        <institution>College of Medicine</institution>
        <institution>University of Florida</institution>
        <addr-line>Gainesville, FL</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jie Xu <email>xujie@ufl.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>27</day>
        <month>5</month>
        <year>2026</year>
      </pub-date>
      <volume>14</volume>
      <elocation-id>e84396</elocation-id>
      <history>
        <date date-type="received">
          <day>2</day>
          <month>10</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>20</day>
          <month>11</month>
          <year>2025</year>
        </date>
        <date date-type="rev-recd">
          <day>7</day>
          <month>5</month>
          <year>2026</year>
        </date>
        <date date-type="accepted">
          <day>8</day>
          <month>5</month>
          <year>2026</year>
        </date>
      </history>
      <copyright-statement>©Zhengkang Fan, Renjie Liang, Chengkun Sun, Jinqian Pan, Russell Terry, Jie Xu. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 27.05.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2026/1/e84396" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Accurate preoperative prediction of renal tumor malignancy is critical for guiding decisions and reducing overtreatment, as a substantial proportion of renal masses prove benign. Although radiology assessments and structured electronic health record (EHR) data are routinely used, many tumor-specific descriptors remain embedded in free-text radiology reports and are underused due to extraction challenges.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to develop and evaluate a multimodal pipeline that integrates structured EHR variables with natural language processing features from computed tomography (CT) radiology reports, including large language model (LLM)–extracted abnormality characteristics and transformer-based report embeddings, to improve malignancy prediction.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We conducted a retrospective cohort study using University of Florida Health Integrated Data Repository Observational Medical Outcomes Partnership–mapped EHR data from December 2011 to August 2024. Adults with renal tumors were included if they had longitudinal diagnostic documentation consistent with a renal mass and at least 1 preoperative renal CT report; final benign or malignant status served as the outcome. Structured features included demographics, comorbidities, medications, vital signs, and laboratory measurements. From the recent preindex CT report, an on-premises LLM isolated kidney-specific findings and extracted abnormality characteristics. Four locally deployed LLMs were evaluated against manual annotations of 500 reports. Kidney-specific text was encoded using pretrained biomedical transformer models, including radiology Bidirectional Encoder Representations from Transformers (BERT) variants. We evaluated unimodal baselines and multimodal early, middle, and late fusion strategies. Model development used 5-fold cross-validation within the 80% training partition; each fold-specific model was evaluated on the same independent 20% held-out test set, with performance reported as mean and SD across the 5 held-out test evaluations. The primary metric was area under the receiver operating characteristic curve (AUC).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The final cohort included 967 patients (n=712, 73.6% malignant). In extraction evaluation, Qwen2.5-32B achieved 88.3% overall accuracy with a 100% extraction success rate and was selected for downstream feature generation. Among unimodal models, the structured clinical variable model achieved an AUC of 0.758 (SD 0.012), kidney-specific text with radiology BERT achieved an AUC of 0.746 (SD 0.058), and abnormality characteristics alone achieved an AUC of 0.716 (SD 0.015). Multimodal fusion models achieved higher descriptive performance than unimodal models. Early fusion achieved the highest AUC (mean 0.813, SD 0.008), and <italic>F</italic><sub>1</sub>-score (mean 0.809, SD 0.030), while late fusion achieved an AUC of 0.805 (SD 0.016). Ablation and interpretability analyses suggested complementary predictive information from structured clinical variables and kidney-specific text embeddings.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Integrating unstructured radiology report text with structured EHR variables achieved higher mean predictive performance than unimodal approaches in descriptive comparisons. Multimodal fusion, particularly early fusion incorporating radiology BERT–derived kidney-specific text embeddings, achieved the strongest discrimination, suggesting potential value of natural language processing–enabled multimodal EHR pipelines for informing preoperative risk stratification.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>EHR</kwd>
        <kwd>electronic health records</kwd>
        <kwd>large language models</kwd>
        <kwd>LLM</kwd>
        <kwd>machine learning</kwd>
        <kwd>ML</kwd>
        <kwd>multimodal data fusion</kwd>
        <kwd>renal tumor malignancy prediction</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Kidney cancer (KC) is the seventh most common cancer in the United States, with renal cell carcinoma (RCC) accounting for approximately 90% of cases [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. In 2025, a total of 80,980 new RCC cases were estimated, with over 64.7% occurring in males [<xref ref-type="bibr" rid="ref3">3</xref>]. Early-stage KC is often asymptomatic, and more than half of cases are detected incidentally during abdominal imaging [<xref ref-type="bibr" rid="ref4">4</xref>]. While surgical resection, partial or radical nephrectomy, remains the primary treatment, nearly 25% of these small tumors prove to be benign postoperatively [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. These unnecessary surgeries expose patients to potential complications without therapeutic benefit, underscoring the urgent need for improved preoperative risk stratification.</p>
      <p>Cross-sectional imaging, particularly computed tomography (CT), plays a central role in the diagnosis and management of renal masses, and many imaging-based models (eg, radiomics or deep learning) have demonstrated promising predictive capabilities. For example, deep learning models applied to preoperative CT imaging have achieved the area under the receiver operating characteristic curve (AUC) of up to 0.87 in differentiating benign from malignant tumors [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref10">10</xref>], and up to approximately 0.86 in predicting RCC subtypes [<xref ref-type="bibr" rid="ref11">11</xref>]. Prognostic models for Stage, Size, Grade, and Necrosis scores or survival outcomes have also shown concordance indices between 0.75 and 0.84 [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. However, these approaches typically require access to raw imaging data and specialized computational infrastructure, limiting scalability in resource-constrained health care settings.</p>
      <p>In contrast, electronic health records (EHRs) are widely available and contain rich, routinely collected data, such as demographics, comorbidities, medication histories, and clinical notes (eg, pathology reports and radiology reports), which can be harnessed to develop predictive models. Structured EHR data captures known RCC risk factors (eg, BMI, smoking history, cardiovascular disease, and diabetes) [<xref ref-type="bibr" rid="ref14">14</xref>], and has been successfully used in risk stratification for various diseases [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Thus, incorporating EHR data into renal tumor prediction models offers a scalable, cost-effective alternative to image-based approaches.</p>
      <p>While structured EHR data offers important clinical information, approximately 80% of health care information remains unstructured, such as narrative texts, images, and signals [<xref ref-type="bibr" rid="ref17">17</xref>]. Tumor-specific details (eg, size, texture, and imaging descriptors) are often embedded in radiology reports rather than structured fields. In response, researchers have used natural language processing (NLP) techniques to extract granular tumor characteristics from free-text clinical documentation [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. Rule-based NLP frameworks have demonstrated efficacy in retrieving tumor-related information from unstructured texts [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]; however, these approaches typically necessitate labor-intensive development, including manual rule crafting and domain-specific engineering, which constrains their scalability and cross-institutional applicability. In contrast, large language models (LLMs), such as GPT-4 [<xref ref-type="bibr" rid="ref22">22</xref>] and LLaMA-2 [<xref ref-type="bibr" rid="ref23">23</xref>], offer improved adaptability and scalability due to their deep contextual understanding acquired through extensive pretraining. Empirical evidence indicates that LLMs can extract structured oncologic information from narrative radiologic and pathologic reports with minimal manual intervention [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref28">28</xref>].</p>
      <p>Building upon these advances, recent studies have explored the multimodal integration of structured EHR data with unstructured textual and imaging features to improve predictive modeling, particularly in oncology [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. For example, Xi et al [<xref ref-type="bibr" rid="ref31">31</xref>] developed a deep learning framework that combined routine magnetic resonance imaging with structured clinical variables to differentiate benign from malignant renal lesions. Similarly, Xu et al [<xref ref-type="bibr" rid="ref32">32</xref>] integrated clinical data with radiomic features derived from CT to support tumor classification. In parallel, combining NLP-processed clinical narratives with structured data has been shown to boost predictive performance across diverse medical domains [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>]. Although research specifically targeting renal tumors is still limited, evidence from related applications suggests strong translational potential.</p>
      <p>In this study, we developed a multimodal pipeline that integrates structured EHR data with features extracted from unstructured radiology reports. An LLM was used to extract abnormality descriptors from free-text narratives, and additional engineered features were derived from tabular EHR data. Each information stream, structured clinical variables, kidney-specific findings, and abnormality characteristics, was encoded via a specialized architecture: Bidirectional Encoder Representations from Transformers (BERT) processed the textual kidney-related observations, while a multilayer perceptron (MLP) handled the structured clinical variables and engineered abnormality characteristics. The resulting embeddings were then combined through a fusion strategy designed to evaluate whether multimodal integration could enhance descriptive model performance for renal tumor prediction. Importantly, integrating structured and unstructured data not only enhances model performance but also enables the creation of more comprehensive patient profiles that could support more individualized assessment and better-informed clinical decisions. An overview of the pipeline is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>A.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>(A) Overview of the proposed multimodal predictive pipeline. (B) Workflow for feature extraction from radiology reports. CT: computed tomography; EHR: electronic health report; LLM: large language model.</p>
        </caption>
        <graphic xlink:href="medinform_v14i1e84396_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Sources and Cohort Definition</title>
        <p>Data were obtained from the University of Florida Health Integrated Data Repository, which houses its clinical content in the Observational Medical Outcomes Partnership Common Data Model. The structured dataset included demographics, diagnoses, medications, procedures, and laboratory results, while unstructured data comprised CT radiology report narratives. We retrospectively identified over 60,000 patients with renal-related conditions observed from December 2011 to August 2024. The study cohort included patients who met all of the following criteria: (1) had at least 2 distinct renal tumor diagnoses, recorded at different time points, based on <italic>ICD</italic> (<italic>International Classification of Diseases</italic>) codes, with the earliest diagnosis designated as the index date; (2) experienced at most 1 change in tumor classification (benign to malignant or malignant to benign), with the final status serving as the outcome label (patients with multiple reversals were excluded); and (3) had at least 1 renal CT report dated before the index date. A schematic illustrating the temporal ordering of imaging, diagnosis, outcome labeling, and surgical intervention is provided in Figure S9 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      </sec>
      <sec>
        <title>Outcome Definition and Sensitivity Analyses</title>
        <p>Benign vs malignant tumor status was derived from longitudinal EHR diagnosis codes, as pathology reports were not uniformly available. Patients were classified as benign or malignant if they had at least 2 corresponding <italic>ICD</italic> diagnosis codes recorded on distinct dates, with the final observed status used as the outcome label. To reduce label instability, patients with multiple benign-malignant reversals were excluded, and a 1-direction transition rule was applied.</p>
        <p>To assess the robustness of the <italic>ICD</italic>-based outcome definition, sensitivity analyses were conducted in patient subsets with higher-specificity reference signals. These included (1) a pathology-confirmed subset identified from unstructured surgical pathology reports and (2) a nephrectomy subset, in which renal tumor diagnosis codes recorded after surgery were used as proxy reference labels. These analyses were used to evaluate the consistency of outcome labeling under more stringent definitions.</p>
      </sec>
      <sec>
        <title>Data Preprocessing</title>
        <p>We integrated structured EHR data with CT radiology report analyses to generate three feature sets for modeling: (1) structured EHR variables; (2) kidney-specific findings extracted from radiology reports; and (3) tumor characteristics derived from these findings. The detailed workflow for feature extraction from radiology reports is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>B.</p>
        <p>Structured clinical variables were retrieved from tables containing patient demographics, diagnoses, procedures, medications, vital signs, and laboratory results. The observation window extended from the earliest available record up to the index date. Demographic variables (eg, age, sex, and race/ethnicity) were categorized, diagnoses were mapped using PheWAS (Phenome-Wide Association Study) Phecode groupings [<xref ref-type="bibr" rid="ref36">36</xref>], and medications were aggregated at the ingredient level using Anatomical Therapeutic Chemical classification codes [<xref ref-type="bibr" rid="ref37">37</xref>]. For vital signs and laboratory tests, the most recent values preceding the index date were selected. For blood pressure, the highest and lowest systolic and diastolic values recorded on the most recent measurement day were used as continuous summary features. Dialysis procedures were encoded as binary indicators. Categorical features were one-hot encoded, and continuous variables were standardized prior to modeling.</p>
        <p>To isolate kidney-specific findings, the most recent radiology report within the observation window was selected. Given that abdominal CT reports often include findings for multiple organs, the LLM Qwen2.5-32B [<xref ref-type="bibr" rid="ref38">38</xref>] was then used to isolate the paragraph exclusively detailing kidney-specific findings, yielding a consolidated narrative for downstream analysis. The model was hosted on premises to ensure patient confidentiality and maintain data integrity.</p>
        <p>Abnormality characteristics were extracted from the kidney-specific narratives using an LLM to capture detailed lesion descriptors absent from structured data. Extracted features included abnormality presence, type (eg, cyst, mass, or tumor), lesion size, anatomical position (eg, left kidney or right kidney), exophytic status, CT attenuation, and contrast enhancement. For patients with multiple preindex renal CT examinations, tumor growth rate was calculated using only lesion size measurements obtained prior to the index date, defined as the earliest recorded renal tumor diagnosis. Specifically, when at least 2 preindex imaging studies were available, growth rate was computed as the slope of lesion size change (cm per day) between the earliest and latest available preindex imaging studies.</p>
        <p>Missing values in structured EHR variables were handled using modality-specific strategies. Before imputation, laboratory variables with greater than 50% missingness were excluded from the structured clinical feature set. Continuous features (ie, laboratory measurements and vital signs) were imputed using mean values calculated from the full cohort, and categorical variables (ie, diagnosis and medication records) were processed via standard presence-absence encoding, with missing entries encoded as 0 (eg, absence of a diagnosis or medication record). For radiology-derived abnormality characteristics, categorical attributes were treated as “unknown” when not explicitly mentioned and encoded as a separate category. Continuous abnormality variables (eg, lesion size and tumor growth rate) were encoded using a 2-part sentinel representation. For cases where size information was unavailable, or where fewer than 2 preindex longitudinal imaging studies were present to compute a growth rate, the numeric value was imputed with a placeholder of zero. This value was paired with a corresponding binary missingness indicator to ensure the model could mathematically distinguish between missing measurements and true physiologic stability (eg, a measured growth rate of zero). Both the numeric placeholder and the binary indicator were included as independent features in the model input. Kidney-specific text findings were encoded using pretrained biomedical transformer models, producing fixed-length embeddings of 768 dimensions. More details can be found in Table S7 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      </sec>
      <sec>
        <title>LLM Extraction Evaluation</title>
        <p>To evaluate the performance of LLMs in extracting these abnormality characteristics, we manually annotated 500 independent radiology reports to establish a gold-standard dataset. The gold-standard annotations were created by 2 independent nonexpert annotators with biomedical research training, following detailed annotation guidelines developed in consultation with board-certified radiologists. Annotators independently labeled each of the 500 radiology reports, blinded to model outputs. Interrater reliability was assessed using Cohen κ for each abnormality attribute, with disagreements resolved through consensus discussion guided by a radiologist.</p>
        <p>For automated extraction, the pipeline identifies kidney-related sentences, extracts lesions with their attributes as structured entries, validates outputs against a predefined schema, and ranks lesions by severity, prioritizing tumors over masses and cysts, and ordering within categories by size. Four locally deployed LLMs (Qwen2.5-7B [<xref ref-type="bibr" rid="ref38">38</xref>], Qwen2.5-32B [<xref ref-type="bibr" rid="ref38">38</xref>], LLaMA3-8B [<xref ref-type="bibr" rid="ref39">39</xref>], and LLaMA3-70B [<xref ref-type="bibr" rid="ref39">39</xref>]) were evaluated in inference-only mode under institutional data privacy constraints, without any task-specific model training.</p>
        <p>To quantify extraction performance, 2 components of the pipeline were assessed. Kidney-specific paragraph retrieval was evaluated by comparing automatically retrieved paragraphs with manually annotated reference paragraphs using the Bilingual Evaluation Understudy-4 (BLEU-4) metric [<xref ref-type="bibr" rid="ref40">40</xref>], which measures n-gram overlap between predicted and gold-standard text. Performance was summarized as the average of the BLEU-4 score and the percentage of samples achieving a BLEU-4 score greater than 0.5 across reports. A BLEU-4 score above 0.5 indicates good alignment between predicted and reference text in terms of vocabulary, phrase structure, and semantic content, and was considered acceptable for practical downstream use. Abnormality characteristic extraction was evaluated by computing per-attribute accuracy based on an exact match between model outputs and gold-standard annotations. All performance metrics were calculated on the same set of 500 annotated radiology reports.</p>
      </sec>
      <sec>
        <title>Modeling Approach and Fusion Strategies</title>
        <sec>
          <title>Overview</title>
          <p>To integrate the 3 feature modalities, we implemented and compared 3 fusion strategies: early, intermediate (middle), and late fusion, alongside unimodal baseline models (<xref rid="figure2" ref-type="fig">Figure 2</xref>).</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Detailed architecture of the multimodal prediction model.</p>
            </caption>
            <graphic xlink:href="medinform_v14i1e84396_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Baseline Models</title>
          <p>We independently evaluated structured clinical variables and radiology-derived tumor attributes using a range of machine learning algorithms, including least absolute shrinkage and selection operator regression [<xref ref-type="bibr" rid="ref41">41</xref>], logistic regression [<xref ref-type="bibr" rid="ref42">42</xref>], MLP [<xref ref-type="bibr" rid="ref43">43</xref>], random forest (RF) [<xref ref-type="bibr" rid="ref44">44</xref>], support vector machine (SVM) [<xref ref-type="bibr" rid="ref45">45</xref>], and extreme gradient boosting [<xref ref-type="bibr" rid="ref46">46</xref>]. For narrative kidney-specific findings, 4 pretrained biomedical transformer models (radiology BERT [RadBERT] [<xref ref-type="bibr" rid="ref47">47</xref>], BERT pretrained on clinical text [ClinicalBERT] [<xref ref-type="bibr" rid="ref48">48</xref>], biomedical BERT [BioBERT] [<xref ref-type="bibr" rid="ref49">49</xref>], and BERT pretrained on PubMed text [PubMedBERT] [<xref ref-type="bibr" rid="ref50">50</xref>]) were used to generate text embeddings. For structured clinical variables and engineered lesion features, model performance was assessed under different resampling strategies (no resampling, random undersampling, random oversampling) to address class imbalance. For the text modality, the 4 transformer models were directly compared to identify the most effective baseline embedding approach.</p>
        </sec>
        <sec>
          <title>Early Fusion</title>
          <p>This approach integrates all 3 modalities at the input stage. Kidney-specific findings were first encoded into dense vectors using the best-performing pretrained transformer model selected from the 4 candidates described above. These embeddings were then concatenated with structured clinical variables and radiology-derived tumor attributes to form a combined feature vector, which was classified using a shallow 2-layer MLP. To mitigate overfitting in this high-dimensional space, we used dropout (0.1) after each hidden layer and Adam with decoupled weight decay (AdamW) optimization with weight decay (0.01; excluding bias and LayerNorm weights), providing L2 regularization of trainable parameters.</p>
        </sec>
        <sec>
          <title>Middle Fusion</title>
          <p>In this architecture, each modality was independently projected into a latent embedding space before being merged and passed through a 2-layer MLP classifier. To evaluate the effect of embedding choice, early and intermediate fusion experiments were repeated using each of the 4 BERT variants. Specifically, modality-specific inputs were compressed into lower-dimensional representations through dedicated projection layers with rectified linear unit activation and dropout (0.1) prior to fusion, thereby reducing dimensionality and mitigating overfitting risk. Training was performed using AdamW optimization with weight decay (0.01; excluding bias and LayerNorm weights) to ensure consistent L2 regularization.</p>
        </sec>
        <sec>
          <title>Late Fusion</title>
          <p>Predicted probabilities from the unimodal base models were combined using a logistic-regression meta-learner [<xref ref-type="bibr" rid="ref42">42</xref>]. To avoid bias from in-sample predictions, an out-of-fold (OOF) stacking strategy was applied. After splitting the dataset into an 80% training set and a 20% held-out test set, 5-fold cross-validation was conducted within the training set. Base models generated probabilities only for their validation folds, producing OOF predictions. For each fold, the meta-learner was trained using the concatenated OOF probabilities from the other folds and evaluated on the held-out test set using the corresponding fold-specific test probabilities. This procedure ensured that the meta-learner was trained only on predictions from samples not used to fit the base models.</p>
        </sec>
      </sec>
      <sec>
        <title>Model Validation and Performance Metrics</title>
        <p>Model performance was assessed using a stratified 80/20 train-test split. The 80% training partition was used for model development with 5-fold cross-validation. In each iteration, 1 fold was used for internal validation, and the remaining 4 folds were used for training. The resulting 5-fold–specific models were each evaluated on the same independent 20% held-out test set, which was not used for training, validation, resampling, or hyperparameter tuning. For the machine learning classifiers, hyperparameters were tuned via grid search, with model selection guided by the minimization of negative log-loss on the validation folds. In parallel, deep neural networks were trained using the AdamW optimizer and a linear learning rate scheduler with an initial warm-up phase, with cross-entropy loss serving as the selection criterion on validation subsets.</p>
        <p>The primary evaluation metric was the AUC. Secondary metrics included overall accuracy, balanced accuracy, sensitivity (recall for the malignant class), specificity, precision (positive predictive value), and <italic>F</italic><sub>1</sub>-score, all reported as the mean and SD across 5-fold–specific models evaluated on the independent held-out test set. Balanced accuracy was included to provide a more representative assessment of the model’s discriminative performance beyond class prevalence by equally weighting sensitivity and specificity in the presence of class imbalance. Decision thresholds for probability-based models were determined by maximizing the Youden J statistic. To address class imbalance, we compared 3 approaches: no resampling, random undersampling, and random oversampling. To prevent data leakage, all resampling procedures were performed strictly within the training partition of each cross-validation fold; validation folds and the held-out test set were preserved in their original class distributions.</p>
      </sec>
      <sec>
        <title>Interpretability Analyses</title>
        <p>We assessed the contribution of each modality to the best-performing multimodal fusion model using 2 complementary approaches.</p>
        <sec>
          <title>Kidney-Specific Findings (Text Modality)</title>
          <p>We assessed the contribution of individual tokens from the text modality by applying Captum’s layer-integrated gradients to the BERT embedding layer of our best-performing multimodal fusion model. First, we encapsulated the pretrained model in a wrapper that returns class-probability outputs via a SoftMax over logits. Using the wrapped model and specifying the BERT word-embedding layer as the attribution target, we computed layer-integrated gradients for each test sample with 50 interpolation steps. For each test sample, we summed the absolute attributions over the embedding dimension to yield a scalar score per token, then merged WordPiece subtokens into full words by concatenating “##” continuations and averaging their scores. We filtered out trivial tokens (length ≤2), retained the top 15 highest-scoring words per sample, and accumulated counts and total scores across the dataset. Dividing total attribution by occurrence count produced an average-importance score for each word. To improve robustness, this analysis was performed for the best model from each of the 5 cross-validation folds; keyword importance scores were then averaged across folds, and the final top-15 keywords were selected based on the aggregated importance ranking.</p>
        </sec>
        <sec>
          <title>Abnormality Characteristics and Clinical Variables Feature Analysis</title>
          <p>To disentangle and measure the respective contributions of abnormality characteristics and clinical variables to the fusion model’s outputs, we developed 2 dedicated input filters that retain only 1 modality at a time while nullifying all others and applied a gradient-based Shapley value explainer with a randomly selected reference set of up to 100 background samples. For each held-out sample, we computed local attributions for the positive class and derived the mean absolute contribution of each feature across the test cohort, thereby identifying the 15 most influential attributes. These results were summarized using a concise bar chart of mean absolute attributions and a beeswarm plot illustrating the polarity and variability of each feature’s contribution.</p>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The study has been approved, and the requirement to obtain any informed consent has been waived by the University of Florida Institutional Review Board (protocol number IRB202100401). The research does not involve greater than minimal risk for participation. Analyses only involve the secondary analysis of data that are either limited data sets or deidentified. Our research team has no direct contact with human subjects. All methods were carried out in accordance with relevant guidelines and regulations.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Cohort Characteristics</title>
        <p>A total of 967 patients with renal tumors met the inclusion criteria. The mean age at initial diagnosis, whether benign or malignant, was 69 (SD 12.93) years, with approximately 60% of the cohort being male. Among these patients, 712 (73.6%) were diagnosed with malignant tumors. Comorbidities were common: over 90% (874/967) had disorders of the kidney and ureters, and 74% (719/967) had a documented history of hypertension. Smoking was reported in 15.51% (150/967) of patients, and the mean BMI was 30.07 (SD 7.08). Detailed baseline characteristics and comorbidity profiles are summarized in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The median observation period was 34.43 (IQR 8.53-73.92) months. Surgical intervention was performed in 401 patients (41.47% of the cohort). Among these, 294 (30.40%) patients underwent partial nephrectomy and 250 (25.85%) patients underwent radical nephrectomy; 143 (14.79%) patients received both procedures. Tumor growth rate was available for 22.2% (215/967) of patients, reflecting those with at least 2 preindex surveillance CT examinations; the remaining cases were treated as missing for this feature.</p>
      </sec>
      <sec>
        <title>Sensitivity Analyses for Outcome Definition</title>
        <p>Surgical pathology reports containing renal tumor–related diagnostic statements were available for 89 patients (from 97 reports), of whom 81 (91.0%) were classified as malignant based on pathology text. In this pathology-confirmed subset, the primary <italic>ICD</italic>-based outcome labels were fully concordant with pathology-derived malignancy status. In addition, 401 patients underwent nephrectomy, and among the 383 with postprocedure renal tumor diagnosis codes, 347 (90.6%) were malignant; outcome labels derived from <italic>ICD</italic> codes were fully concordant with postnephrectomy diagnostic coding.</p>
      </sec>
      <sec>
        <title>LLM Extraction Evaluation</title>
        <p>Interrater reliability for the manually annotated gold-standard dataset was substantial to near-perfect across abnormality attributes (Cohen κ=0.95-1.00) (Table S10 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). <xref ref-type="table" rid="table1">Table 1</xref> summarizes the performance of LLMs in kidney-specific paragraph retrieval and abnormality characteristic extraction from radiology reports. As shown in <xref ref-type="table" rid="table1">Table 1</xref>, Qwen2.5-32B and LLaMA3-70B demonstrated comparable performance in abnormality characteristic extraction. For kidney-specific paragraph retrieval, both models achieved high BLEU-4 scores, with more than 95% (475/500) of reports exceeding a BLEU-4 threshold of 0.5, indicating accurate identification of the relevant anatomical sections. However, accuracy alone does not adequately reflect model usability. Qwen2.5-7B and LLaMA3-8B failed to extract 10.4% (52/500) and 18.2% (91/500) cases, respectively, despite repeated attempts. Additionally, the standard LLaMA3 release was limited by its 8192-token context window, which hindered few-shot prompting with multiple examples. In contrast, Qwen2.5-32B demonstrated more reliable kidney-specific paragraph retrieval across long, heterogeneous radiology reports and supported stable local deployment under institutional data-privacy constraints. Considering paragraph retrieval fidelity, abnormality extraction accuracy, and robustness, Qwen2.5-32B was selected as the primary model for downstream lesion feature extraction.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Performance comparison of large language models for kidney-specific paragraph retrieval and abnormality characteristic extraction from radiology reports.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="300"/>
            <col width="0"/>
            <col width="170"/>
            <col width="170"/>
            <col width="0"/>
            <col width="170"/>
            <col width="0"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td>Qwen2.5-7B</td>
                <td colspan="2">Qwen2.5-32B</td>
                <td colspan="2">LLaMA3-8B</td>
                <td>LLaMA3-70B</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="9">
                  <bold>Kidney-specific findings</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Average BLEU<sup>a</sup>-4 score</td>
                <td colspan="2">0.615</td>
                <td>0.913</td>
                <td colspan="2">0.878</td>
                <td colspan="2">0.929</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BLEU-4 ≥0.5, n (%)</td>
                <td colspan="2">332 (66.4)</td>
                <td>475 (95)</td>
                <td colspan="2">466 (93.2)</td>
                <td colspan="2">482 (96.4)</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Abnormality characteristics<sup>b</sup>, %</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Abnormality presence</td>
                <td colspan="2">76.3</td>
                <td>88.4</td>
                <td colspan="2">76.5</td>
                <td colspan="2">84.6</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Abnormality category</td>
                <td colspan="2">60.4</td>
                <td>83.5</td>
                <td colspan="2">60.6</td>
                <td colspan="2">81.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Position</td>
                <td colspan="2">52.0</td>
                <td>81.8</td>
                <td colspan="2">51.8</td>
                <td colspan="2">79.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Size (cm)</td>
                <td colspan="2">53.6</td>
                <td>89.1</td>
                <td colspan="2">76.2</td>
                <td colspan="2">78.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Exophytic</td>
                <td colspan="2">94.6</td>
                <td>100.0</td>
                <td colspan="2">94.4</td>
                <td colspan="2">100.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Attenuation</td>
                <td colspan="2">46.9</td>
                <td>85.6</td>
                <td colspan="2">47.2</td>
                <td colspan="2">86.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Enhancement</td>
                <td colspan="2">41.5</td>
                <td>89.4</td>
                <td colspan="2">41.3</td>
                <td colspan="2">88.6</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overall accuracy</td>
                <td colspan="2">60.8</td>
                <td>88.3</td>
                <td colspan="2">64.0</td>
                <td colspan="2">85.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Extraction success rate</td>
                <td colspan="2">89.6</td>
                <td>100</td>
                <td colspan="2">81.8</td>
                <td colspan="2">100</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>BLEU: Bilingual Evaluation Understudy-4.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Abnormality characteristics included abnormality presence, type (eg, cyst, mass, or tumor), lesion size, anatomical position (eg, left kidney or right kidney), exophytic status, computed tomography attenuation, and contrast enhancement.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Modeling Results</title>
        <p>The consolidated performance results for all modeling approaches are presented in <xref ref-type="table" rid="table2">Table 2</xref>. <xref ref-type="table" rid="table2">Table 2</xref> reports performance on the independent 20% held-out test set. Values are summarized as mean and SD across 5-fold–specific models, each trained during cross-validation within the 80% training partition and then evaluated on the same held-out test set. In the unimodal analyses, the SVM model applied to structured clinical variables achieved the highest descriptive AUC among the 6 traditional algorithms, achieving an AUC of 0.758 and a balanced accuracy of 0.717, whereas the RF model achieved the highest <italic>F</italic><sub>1</sub>-score (0.833). For the engineered abnormality characteristics, logistic regression led the same set of models with an AUC of 0.716 and an <italic>F</italic><sub>1</sub>-score of 0.766. Ablation studies comparing undersampling, oversampling, and no-sampling strategies are provided in Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for the clinical variable models and Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for the LLM-engineered abnormality characteristic models. Among the 4 medically pretrained BERT variants evaluated on kidney-specific report findings, RadBERT achieved the highest descriptive performance, yielding an AUC of 0.746 and an <italic>F</italic><sub>1</sub>-score of 0.719. In addition to AUC and <italic>F</italic><sub>1</sub>-score, balanced accuracy was reported for all models to account for class imbalance (<xref ref-type="table" rid="table2">Table 2</xref>). Among fusion models, early fusion achieved the highest balanced accuracy (mean 0.775, SD 0.010), followed by late fusion (mean 0.749, SD 0.013), indicating consistent performance across malignant and benign classes rather than dominance by the majority malignant class.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Performance comparison (mean and SD) of unimodal and fusion models for renal tumor prediction.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="130"/>
            <col width="130"/>
            <col width="140"/>
            <col width="110"/>
            <col width="120"/>
            <col width="120"/>
            <col width="110"/>
            <col width="110"/>
            <thead>
              <tr valign="bottom">
                <td colspan="2">Model</td>
                <td>Overall accuracy</td>
                <td>Balanced accuracy</td>
                <td>Sensitivity</td>
                <td>Specificity</td>
                <td>Precision</td>
                <td><italic>F</italic><sub>1</sub>-score</td>
                <td>AUC<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="9">
                  <bold>Baseline model (clinical variables), mean (SD)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LASSO<sup>b</sup> (OS<sup>c</sup>)</td>
                <td>0.729 (0.026)</td>
                <td>0.706 (0.003)</td>
                <td>0.754 (0.053)</td>
                <td>0.659 (0.051)</td>
                <td>0.862 (0.009)</td>
                <td>0.803 (0.026)</td>
                <td>0.750 (0.006)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR<sup>d</sup> (US<sup>e</sup>)</td>
                <td>0.674 (0.047)</td>
                <td>0.707 (0.034)</td>
                <td>0.638 (0.066)</td>
                <td>0.776 (0.041)</td>
                <td>0.889 (0.018)</td>
                <td>0.741 (0.049)</td>
                <td>0.728 (0.043)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>MLP<sup>f</sup> (NS<sup>g</sup>)</td>
                <td>0.618 (0.200)</td>
                <td>0.661 (0.090)</td>
                <td>0.569 (0.322)</td>
                <td>0.753 (0.145)</td>
                <td>0.693 (0.388)</td>
                <td>0.624 (0.350)</td>
                <td>0.670 (0.151)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RF<sup>h</sup> (US)</td>
                <td>0.761 (0.032)</td>
                <td>0.710 (0.013)</td>
                <td>0.817 (0.058)</td>
                <td>0.604 (0.047)</td>
                <td>0.853 (0.008)</td>
                <td>0.833 (0.029)</td>
                <td>0.754 (0.006)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SVM<sup>i</sup> (US)</td>
                <td>0.712 (0.038)</td>
                <td>0.717 (0.016)</td>
                <td>0.708 (0.072)</td>
                <td>0.725 (0.071)</td>
                <td>0.880 (0.019)</td>
                <td>0.782 (0.037)</td>
                <td>0.758 (0.012)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XGBoost<sup>j</sup> (US)</td>
                <td>0.685 (0.063)</td>
                <td>0.707 (0.037)</td>
                <td>0.660 (0.110)</td>
                <td>0.753 (0.105)</td>
                <td>0.886 (0.038)</td>
                <td>0.751 (0.065)</td>
                <td>0.725 (0.044)</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Baseline model (abnormality characteristics), mean (SD)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LASSO (OS)</td>
                <td>0.667 (0.048)</td>
                <td>0.680 (0.019)</td>
                <td>0.653 (0.115)</td>
                <td>0.706 (0.144)</td>
                <td>0.868 (0.036)</td>
                <td>0.739 (0.060)</td>
                <td>0.711 (0.018)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR (OS)</td>
                <td>0.691 (0.040)</td>
                <td>0.684 (0.010)</td>
                <td>0.698 (0.094)</td>
                <td>0.671 (0.114)</td>
                <td>0.860 (0.028)</td>
                <td>0.766 (0.048)</td>
                <td>0.716 (0.015)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>MLP (NS)</td>
                <td>0.682 (0.028)</td>
                <td>0.677 (0.030)</td>
                <td>0.688 (0.085)</td>
                <td>0.667 (0.139)</td>
                <td>0.857 (0.033)</td>
                <td>0.760 (0.037)</td>
                <td>0.697 (0.022)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RF (NS)</td>
                <td>0.642 (0.020)</td>
                <td>0.677 (0.020)</td>
                <td>0.604 (0.032)</td>
                <td>0.749 (0.047)</td>
                <td>0.871 (0.018)</td>
                <td>0.713 (0.021)</td>
                <td>0.670 (0.007)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SVM (OS)</td>
                <td>0.692 (0.023)</td>
                <td>0.679 (0.007)</td>
                <td>0.706 (0.049)</td>
                <td>0.651 (0.053)</td>
                <td>0.851 (0.012)</td>
                <td>0.771 (0.026)</td>
                <td>0.688 (0.018)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XGBoost (NS)</td>
                <td>0.634 (0.065)</td>
                <td>0.655 (0.021)</td>
                <td>0.611 (0.134)</td>
                <td>0.698 (0.138)</td>
                <td>0.857 (0.036)</td>
                <td>0.704 (0.087)</td>
                <td>0.663 (0.039)</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Baseline model (kidney-specific findings), mean (SD)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RadBERT<sup>k</sup></td>
                <td>0.665 (0.081)</td>
                <td>0.724 (0.045)</td>
                <td>0.600 (0.124)</td>
                <td>0.847 (0.051)</td>
                <td>0.917 (0.015)</td>
                <td>0.719 (0.100)</td>
                <td>0.746 (0.058)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ClinicalBERT<sup>l</sup></td>
                <td>0.668 (0.044)</td>
                <td>0.683 (0.039)</td>
                <td>0.652 (0.069)</td>
                <td>0.714 (0.086)</td>
                <td>0.866 (0.031)</td>
                <td>0.742 (0.045)</td>
                <td>0.723 (0.054)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BioBERT<sup>m</sup></td>
                <td>0.659 (0.073)</td>
                <td>0.634 (0.053)</td>
                <td>0.687 (0.121)</td>
                <td>0.580 (0.124)</td>
                <td>0.822 (0.033)</td>
                <td>0.743 (0.079)</td>
                <td>0.626 (0.068)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>PubMedBERT<sup>n</sup></td>
                <td>0.602 (0.120)</td>
                <td>0.572 (0.042)</td>
                <td>0.635 (0.291)</td>
                <td>0.510 (0.364)</td>
                <td>0.813 (0.075)</td>
                <td>0.671 (0.165)</td>
                <td>0.547 (0.076)</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Fusion model (all 3 modalities), mean (SD)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Early fusion</td>
                <td>0.750 (0.029)</td>
                <td>0.775 (0.010)</td>
                <td>0.722 (0.062)</td>
                <td>0.828 (0.069)</td>
                <td>0.924 (0.021)</td>
                <td>0.809 (0.030)</td>
                <td>0.813 (0.008)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Middle fusion</td>
                <td>0.726 (0.028)</td>
                <td>0.744 (0.018)</td>
                <td>0.705 (0.040)</td>
                <td>0.784 (0.016)</td>
                <td>0.902 (0.006)</td>
                <td>0.791 (0.026)</td>
                <td>0.782 (0.011)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Late fusion</td>
                <td>0.736 (0.021)</td>
                <td>0.749 (0.013)</td>
                <td>0.722 (0.053)</td>
                <td>0.776 (0.072)</td>
                <td>0.902 (0.022)</td>
                <td>0.800 (0.025)</td>
                <td>0.805 (0.016)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>LASSO: least absolute shrinkage and selection operator.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>OS: oversampling.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>LR: logistic regression.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>US: undersampling.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>MLP: multilayer perceptron.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>NS: no sampling.</p>
            </fn>
            <fn id="table2fn8">
              <p><sup>h</sup>RF: random forest.</p>
            </fn>
            <fn id="table2fn9">
              <p><sup>i</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table2fn10">
              <p><sup>j</sup>XGBoost: extreme gradient boosting.</p>
            </fn>
            <fn id="table2fn11">
              <p><sup>k</sup>RadBERT: radiology BERT.</p>
            </fn>
            <fn id="table2fn12">
              <p><sup>l</sup>ClinicalBERT: BERT pretrained on clinical text.</p>
            </fn>
            <fn id="table2fn13">
              <p><sup>m</sup>BioBERT: biomedical BERT.</p>
            </fn>
            <fn id="table2fn14">
              <p><sup>n</sup>PubMedBERT: BERT pretrained on PubMed text.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Variability across cross-validation folds was observed in some configurations using ClinicalBERT and BioBERT embeddings. Because pretrained transformer embeddings are deterministic for fixed inputs, this variability reflects sensitivity of the downstream fusion classifier to specific training-validation splits in a high-dimensional setting rather than the instability of the embedding models themselves. For example, ClinicalBERT-based early fusion exhibited higher fold-level variance (overall accuracy: mean 0.661, SD 0.226; Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), and BioBERT exhibited variability in middle fusion performance (<italic>F</italic><sub>1</sub>-score: mean 0.608, SD 0.344; Table S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), indicating inconsistent fold-level behavior. In contrast, RadBERT-based models demonstrated notably more stable performance across both early and middle fusion strategies.</p>
        <p>Multimodal integration was associated with higher descriptive predictive discrimination than unimodal modeling. The middle fusion framework achieved an AUC of 0.782, which was higher than the best unimodal baseline AUC, while the early fusion configuration reached the highest AUC of 0.813 and the highest <italic>F</italic><sub>1</sub>-score of 0.809. The late fusion strategy, which combined predicted probabilities from top-performing unimodal models through a meta-learner, yielded a competitive AUC of 0.805, with precision and sensitivity values of 0.902 and 0.722, respectively. These results suggest that integrating structured and unstructured data modalities may provide complementary information for renal tumor malignancy prediction.</p>
      </sec>
      <sec>
        <title>Ablation Studies</title>
        <p>To quantify the contribution of each data modality, we performed an ablation analysis on each fusion architecture by systematically omitting one modality and measuring the resulting impact on model performance. For the 3 modalities, all pairwise combinations were evaluated and their metrics compared against those obtained using the full 3-modality setup. Detailed results are presented in <xref ref-type="table" rid="table3">Table 3</xref>. Mean (SD) of each evaluation metric across 5-fold cross-validation is reported. Balanced accuracy was also reported in ablation analyses (<xref ref-type="table" rid="table3">Table 3</xref>) to ensure that observed performance differences across modality combinations were not driven by class prevalence effects. Input modalities include kidney-specific findings, abnormality characteristics, and clinical variables. Because RadBERT achieved the best overall performance, all ablation results reported in <xref ref-type="table" rid="table3">Table 3</xref> are based on models incorporating RadBERT (ie, late fusion combining RadBERT with RF). Ablation analyses for the other 3 BERT variants are provided in Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> (ie, early fusion missing-modality) and Table S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> (ie, middle fusion missing-modality).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Ablation study results: effect of excluding individual modalities on fusion model performance.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="0"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="130"/>
            <col width="0"/>
            <col width="140"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="0"/>
            <thead>
              <tr valign="bottom">
                <td colspan="5">Modality</td>
                <td colspan="2">overall accuracy</td>
                <td colspan="2">Balanced accuracy</td>
                <td colspan="2">Sensitivity</td>
                <td colspan="2">Specificity</td>
                <td colspan="2">Precision</td>
                <td colspan="2"><italic>F</italic><sub>1</sub>-score</td>
                <td colspan="2">AUC<sup>a</sup></td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="20">
                  <bold>Early fusion, mean (SD)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">AC<sup>b</sup> + CV<sup>c</sup></td>
                <td colspan="2">0.763 (0.052)</td>
                <td colspan="2">0.743 (0.010)</td>
                <td colspan="2">0.785 (0.108)</td>
                <td colspan="2">0.701 (0.108)</td>
                <td colspan="2">0.884 (0.027)</td>
                <td colspan="2">0.827 (0.051)</td>
                <td colspan="2">0.779 (0.010)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">KF<sup>d</sup> + CV</td>
                <td colspan="2">0.749 (0.046)</td>
                <td colspan="2">0.759 (0.030)</td>
                <td colspan="2">0.738 (0.087)</td>
                <td colspan="2">0.780 (0.097)</td>
                <td colspan="2">0.907 (0.032)</td>
                <td colspan="2">0.811 (0.049)</td>
                <td colspan="2">0.796 (0.030)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">KF + AC</td>
                <td colspan="2">0.687 (0.041)</td>
                <td colspan="2">0.727 (0.037)</td>
                <td colspan="2">0.642 (0.048)</td>
                <td colspan="2">0.812 (0.041)</td>
                <td colspan="2">0.905 (0.023)</td>
                <td colspan="2">0.751 (0.038)</td>
                <td colspan="2">0.755 (0.061)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">KF + AC + CV</td>
                <td colspan="2">0.750 (0.029)</td>
                <td colspan="2">0.775 (0.010)</td>
                <td colspan="2">0.722 (0.062)</td>
                <td colspan="2">0.828 (0.069)</td>
                <td colspan="2">0.924 (0.021)</td>
                <td colspan="2">0.809 (0.030)</td>
                <td colspan="2">0.813 (0.008)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="20">
                  <bold>Middle fusion, mean (SD)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">AC + CV</td>
                <td colspan="2">0.715 (0.060)</td>
                <td colspan="2">0.698 (0.030)</td>
                <td colspan="2">0.734 (0.118)</td>
                <td colspan="2">0.662 (0.124)</td>
                <td colspan="2">0.862 (0.027)</td>
                <td colspan="2">0.788 (0.063)</td>
                <td colspan="2">0.731 (0.019)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">KF + CV</td>
                <td colspan="2">0.699 (0.069)</td>
                <td colspan="2">0.730 (0.059)</td>
                <td colspan="2">0.664 (0.083)</td>
                <td colspan="2">0.796 (0.045)</td>
                <td colspan="2">0.899 (0.032)</td>
                <td colspan="2">0.763 (0.066)</td>
                <td colspan="2">0.749 (0.074)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">KF + AC</td>
                <td colspan="2">0.670 (0.056)</td>
                <td colspan="2">0.711 (0.043)</td>
                <td colspan="2">0.625 (0.081)</td>
                <td colspan="2">0.796 (0.069)</td>
                <td colspan="2">0.897 (0.033)</td>
                <td colspan="2">0.734 (0.058)</td>
                <td colspan="2">0.747 (0.052)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">KF + AC + CV</td>
                <td colspan="2">0.726 (0.028)</td>
                <td colspan="2">0.744 (0.018)</td>
                <td colspan="2">0.705 (0.040)</td>
                <td colspan="2">0.784 (0.016)</td>
                <td colspan="2">0.902 (0.006)</td>
                <td colspan="2">0.791 (0.026)</td>
                <td colspan="2">0.782 (0.011)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="20">
                  <bold>Late fusion, mean (SD)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td>AC + CV</td>
                <td colspan="2">0.677 (0.038)</td>
                <td colspan="2">0.704 (0.025)</td>
                <td colspan="2">0.648 (0.091)</td>
                <td colspan="2">0.761 (0.125)</td>
                <td colspan="2">0.888 (0.035)</td>
                <td colspan="2">0.745 (0.046)</td>
                <td colspan="2">0.752 (0.021)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td>KF + CV</td>
                <td colspan="2">0.764 (0.025)</td>
                <td colspan="2">0.752 (0.032)</td>
                <td colspan="2">0.778 (0.067)</td>
                <td colspan="2">0.725 (0.119)</td>
                <td colspan="2">0.892 (0.036)</td>
                <td colspan="2">0.828 (0.027)</td>
                <td colspan="2">0.794 (0.026)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td>KF + AC</td>
                <td colspan="2">0.692 (0.040)</td>
                <td colspan="2">0.722 (0.026)</td>
                <td colspan="2">0.659 (0.055)</td>
                <td colspan="2">0.784 (0.014)</td>
                <td colspan="2">0.895 (0.009)</td>
                <td colspan="2">0.758 (0.040)</td>
                <td colspan="2">0.758 (0.037)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td>KF + AC + CV</td>
                <td colspan="2">0.736 (0.021)</td>
                <td colspan="2">0.749 (0.013)</td>
                <td colspan="2">0.722 (0.053)</td>
                <td colspan="2">0.776 (0.072)</td>
                <td colspan="2">0.902 (0.022)</td>
                <td colspan="2">0.800 (0.025)</td>
                <td colspan="2">0.805 (0.016)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>AC: abnormality characteristics.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>CV: clinical variables.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>KF: kidney-specific findings.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>In late fusion experiments, all 24 combinations of 6 traditional machine learning classifiers and 4 pretrained BERT models were evaluated. Among these, RadBERT paired with RF emerged as the top-performing configuration (see Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <p>Omitting kidney-specific findings led to the greatest performance decline in both middle and late fusion configurations. In contrast, within the early fusion scheme, excluding clinical variables caused the most pronounced drop. Removal of abnormality characteristics had only a modest effect across all 3 fusion strategies, with early and late fusion approaches exhibiting robustness to their absence.</p>
      </sec>
      <sec>
        <title>Interpretability Analyses on Fusion Model</title>
        <sec>
          <title>Kidney-Specific Findings: Visualization and Interpretation</title>
          <p>The interpretability analysis of the text modality revealed distinctive linguistic patterns associated with malignancy predictions. The average contribution scores, computed using layer-integrated gradients, were visualized as a ranked horizontal bar plot (Figure S11 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Each bar represents a token, and bar length corresponds to the token’s mean attribution score toward the model’s positive-class predictions (larger values indicate stronger positive contribution). Representative high-impact terms from the kidney-specific findings notes are listed in <xref ref-type="table" rid="table4">Table 4</xref>. Tokens such as “greater,” “irregularly,” and “huge” exhibited the highest attribution scores, indicating a strong association with malignancy classification in the multimodal fusion framework.</p>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>Representative words from the kidney-specific findings notes.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="150"/>
              <col width="180"/>
              <col width="670"/>
              <thead>
                <tr valign="top">
                  <td>Ground truth</td>
                  <td>Prediction</td>
                  <td>Kidney-specific findings</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Malignant</td>
                  <td>Malignant</td>
                  <td>There is a 2.3 cm exophytic lesion on the inferior pole of the <italic>right kidney</italic>, with an attenuation <italic>greater</italic> than expected for simple cyst.</td>
                </tr>
                <tr valign="top">
                  <td>Malignant</td>
                  <td>Malignant</td>
                  <td>Within the right kidney posteriorly, there is an <italic>irregularly</italic> enhancing mass measuring approximately 2.7 x 2.7 cm.</td>
                </tr>
                <tr valign="top">
                  <td>Malignant</td>
                  <td>Malignant</td>
                  <td><italic>Huge</italic> left renal mass is again partially <italic>observed</italic>.</td>
                </tr>
                <tr valign="top">
                  <td>Benign</td>
                  <td>Benign</td>
                  <td>There are multiple <italic>unchanged</italic> bilateral renal <italic>simple</italic> cysts.</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
        <sec>
          <title>Abnormality Characteristics and Clinical Variables Feature Analysis</title>
          <p>For the structured clinical variables, feature importance rankings derived from gradient-based Shapley values (<xref rid="figure3" ref-type="fig">Figure 3</xref>A) indicated that diastolic and systolic blood pressures, BMI, along with serum sodium, were the most influential predictors. Among the abnormality characteristics, lesion size demonstrated the highest importance (<xref rid="figure3" ref-type="fig">Figure 3</xref>B), followed by the presence of a mass, with enhancement patterns furnishing critical discriminative information for differentiating benign from malignant lesions.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Shapley additive explanations (SHAP) summary plot showing (A) the top 15 clinical variables and (B) the top 15 abnormality characteristics contributing to model prediction. DBP: diastolic blood pressure; HDL: high-density lipoprotein; SBP: systolic blood pressure.</p>
            </caption>
            <graphic xlink:href="medinform_v14i1e84396_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Results</title>
        <p>Among the unimodal approaches, structured clinical variables showed the highest descriptive predictive performance. Specifically, an SVM classifier with undersampling achieved an AUC of 0.758 (<xref ref-type="table" rid="table2">Table 2</xref>; Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). By comparison, logistic regression on abnormality characteristics, enhanced via oversampling, yielded an AUC of 0.716 (Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), likely limited by extraction fidelity and feature granularity. Text embeddings from complete paragraph-level kidney-specific findings slightly outperformed the abnormality descriptors, achieving an AUC of 0.746 (<xref ref-type="table" rid="table2">Table 2</xref>) when encoded with RadBERT [<xref ref-type="bibr" rid="ref47">47</xref>] (initialized from BioBERT [<xref ref-type="bibr" rid="ref49">49</xref>] and fine-tuned on radiology reports). PubMedBERT [<xref ref-type="bibr" rid="ref50">50</xref>], pretrained from scratch on PubMed abstracts and full-text articles, showed comparatively lower performance, likely due to its limited alignment with radiology-report semantics. All 3 multimodal fusion strategies demonstrated higher descriptive AUC values than the unimodal models (<xref ref-type="table" rid="table2">Table 2</xref>). Early fusion, concatenating feature representations from each modality prior to classification, produced the highest AUC (0.813) and <italic>F</italic><sub>1</sub>-score (0.809), potentially because it preserves complementary information across modalities. Middle fusion, which first projects each modality into a lower-dimensional latent space, performed slightly worse (AUC 0.782; <italic>F</italic><sub>1</sub>-score=0.791), possibly due to information loss during dimensionality reduction. Late fusion remained competitive, achieving an AUC of 0.805 and an <italic>F</italic><sub>1</sub>-score of 0.800, but did not surpass early fusion on any major metric. Overall, these findings indicate that early integration of multimodal information achieved the highest descriptive performance in this study, while late fusion still offered stable and competitive performance through an ensemble-based combination of unimodal predictors.</p>
        <p>Although AUC values in the range of 0.8 are insufficient to support automated clinical decision-making, they may offer incremental value for preoperative risk stratification. For patients with indeterminate renal masses, a multimodal model could complement radiologic interpretation by providing an additional quantitative estimate of malignancy risk to inform multidisciplinary discussion and clinical judgment. Such estimates are not intended to direct specific interventions but rather to contextualize imaging findings and patient characteristics within existing diagnostic workflows. Higher specificity, if confirmed in future validation studies, could potentially help reduce unnecessary interventions for benign lesions, an ongoing challenge in renal mass management. Overall, incremental gains in predictive performance may translate into practical support for clinical risk assessment when used alongside established evaluation processes.</p>
      </sec>
      <sec>
        <title>Modality Contributions From Ablation Studies</title>
        <p>To assess the contribution of each modality within the fusion architectures, we performed systematic ablation experiments. In the early fusion model, omitting structured clinical variables was associated with the largest decrease in AUC, from 0.813 to 0.755, indicating that clinical variables contributed the strongest predictive signal in this configuration. Removing kidney-specific findings led to a smaller decline in AUC to 0.779, whereas excluding radiologic abnormality characteristics had only a modest effect, with the AUC remaining 0.796. In the middle fusion model, the pattern differed slightly: omitting kidney-specific findings caused the greatest reduction in AUC, from 0.782 to 0.731, while excluding abnormality characteristics or clinical variables resulted in smaller decreases, to 0.749 and 0.747, respectively. In the late fusion model, removing kidney-specific findings caused the largest reduction in AUC, from 0.805 to 0.752, closely followed by omission of clinical variables, which reduced the AUC to 0.758; excluding abnormality characteristics resulted in only a modest decline, with the AUC remaining 0.794. Overall, these ablation results suggest that structured clinical variables were the dominant source of predictive information in the early fusion scheme, whereas kidney-specific text findings contributed most strongly in the middle and late fusion models. Across all 3 fusion strategies, removal of abnormality characteristics produced the smallest or among the smallest performance changes, suggesting that part of the radiologic information captured by these handcrafted features may already be represented in the kidney-specific text embeddings.</p>
      </sec>
      <sec>
        <title>Interpretability Insights</title>
        <p>Furthermore, we conducted an interpretability analysis on the optimal early fusion model by applying Captum’s layer-integrated gradients to the kidney-specific text and Shapley additive explanations (SHAP) values to the abnormality characteristics and clinical variables. As illustrated in the horizontal bar plot visualization (Figure S11 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) and exemplified in <xref ref-type="table" rid="table4">Table 4</xref>, tokens such as “huge,” “irregularly,” and “greater” attained relatively high integrated gradients attributions, suggesting a stronger contribution to malignancy predictions. This observation is in line with prior imaging research reporting that irregular tumor margins are associated with higher-grade clear cell RCC [<xref ref-type="bibr" rid="ref51">51</xref>].</p>
        <p>Quantitative SHAP analysis of clinical variables (<xref rid="figure3" ref-type="fig">Figure 3</xref>A) showed that higher systolic blood pressure, diastolic blood pressure, and BMI were associated with increased predicted probabilities of malignancy. These associations are consistent with previous epidemiological studies: individuals with diastolic pressure ≥100 mm Hg face more than a twofold higher risk of RCC compared to those with &#60;80 mm Hg [<xref ref-type="bibr" rid="ref52">52</xref>], and a dose-response meta-analysis demonstrates that each 1 kg/m<sup>2</sup> increase in BMI is linked to an approximately 6% higher incidence of KC [<xref ref-type="bibr" rid="ref53">53</xref>]. For abnormality characteristics (<xref rid="figure3" ref-type="fig">Figure 3</xref>B), SHAP analysis identified tumor size as the most influential imaging feature, with contrast‐enhancement also contributing meaningfully to the model’s predictions. This finding aligns with prior evidence showing that each 1 cm increase in tumor diameter is associated with a ~30% higher likelihood of malignancy (effect size=1.3, 95% CI 1.22-1.43 per cm) [<xref ref-type="bibr" rid="ref54">54</xref>]. Furthermore, established imaging guidelines indicate that postcontrast attenuation gains greater than 15 Hounsfield units may aid in distinguishing malignant renal lesions [<xref ref-type="bibr" rid="ref55">55</xref>]. Collectively, these interpretability results suggest that our multimodal fusion framework captures and integrates clinically validated markers across multiple modalities.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Despite these findings, several limitations should be acknowledged. First, this was a single-institution study, and no external validation was performed. Consequently, model performance may reflect institution-specific characteristics, including documentation practices, patient populations, and radiology reporting styles unique to our health care system. This is particularly relevant for NLP-based feature extraction, which can be sensitive to local language conventions. Second, the retrospective use of EHR data introduces inherent biases related to incomplete or inaccurately recorded information. Mean imputation was applied strictly to continuous variables, specifically laboratory measurements and vital signs. To reduce instability from extreme sparsity, laboratory variables with greater than 50% missingness were excluded before imputation. However, some retained variables, including cholesterol and high-density lipoprotein cholesterol, had missingness rates close to this threshold and remained subject to mean imputation. The missingness percentages for these variables are reported in Table S8 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> to provide transparency regarding the extent of missing data. We acknowledge that this simplified strategy may attenuate physiological variance and fail to preserve correlations between related clinical measures, such as metabolic markers, potentially affecting model calibration, particularly for non–tree-based models used in the fusion strategies. In addition, continuous structured variables were imputed and standardized using cohort-level parameters rather than parameters estimated separately within each cross-validation training fold, which may introduce limited information leakage. Third, ground-truth tumor labels were not uniformly confirmed by surgical pathology. For the unoperated majority of the cohort, outcome labels were derived from longitudinal clinical diagnosis codes that may reflect radiologic interpretation rather than definitive tumor biology. This introduces a risk of diagnostic circularity, where the imaging features used for prediction overlap with those informing clinical labeling. While we mitigated this by requiring repeated concordant codes on distinct dates and performing sensitivity analyses on the pathology-confirmed subset, residual misclassification and circularity may persist. To further assess potential temporal leakage from treatment-related terminology, we conducted a temporal audit of all radiology reports used for feature extraction. Among 1338 reports, only 17 (1.27%) contained the term “surgically,” and none of these patients had nephrectomy or other renal procedural codes recorded prior to the index date, suggesting minimal risk of leakage from completed interventions. However, because explicit diagnostic terminology was not systematically removed from the radiology text, some NLP-derived features may partially reflect radiologists’ diagnostic impressions rather than purely morphological descriptors, introducing a potential source of label leakage in the text-derived features. Although the updated interpretability analysis primarily highlighted descriptive terms such as “greater,” “irregularly,” and “huge,” the broader possibility of diagnostic-language leakage remains a limitation of the current approach. Finally, the LLM-based extraction pipeline incurs nontrivial computational requirements. While models were used in inference-only mode, larger architectures like Qwen2.5-32B require high-memory GPU infrastructure for local deployment. Because processing a single radiology report requires several seconds in batch mode, the proposed framework is currently best suited for asynchronous deployment within institutional analytics pipelines rather than real-time clinical use in resource-constrained environments.</p>
      </sec>
      <sec>
        <title>Future Work</title>
        <p>Future work should focus on validating this multimodal approach across multiple health care institutions using larger and more diverse patient populations to ensure broad applicability. Although internal validation demonstrated consistent performance, multicenter external validation across heterogeneous health care systems is required to assess generalizability prior to clinical implementation. Prospective studies are also needed to evaluate clinical use and real-world effectiveness in live diagnostic workflows. Incorporating more robust missing data handling strategies, such as iterative imputation (eg, Multivariate Imputation by Chained Equations [<xref ref-type="bibr" rid="ref56">56</xref>]), could more dynamically capture physiological dependencies among continuous clinical variables. Furthermore, integrating longitudinal patient data may allow the model to provide dynamic risk assessments as new clinical information becomes available over time. Additional research could explore deeper integration with imaging modalities, including advanced radiomic features or the direct use of raw imaging data, potentially enhancing diagnostic accuracy. Finally, improving scalability and computational efficiency through model compression techniques or cloud-based solutions could facilitate wider clinical adoption in resource-constrained clinical settings.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study developed and evaluated a multimodal malignancy prediction pipeline that integrates structured EHR variables with radiology report–derived features, including LLM-extracted abnormality characteristics and transformer-based embeddings of kidney-specific findings. Across systematic comparisons of early, middle, and late fusion, multimodal models achieved higher descriptive performance than unimodal baselines, with early fusion yielding the highest AUC and <italic>F</italic><sub>1</sub>-score. Ablation and interpretability analyses indicated that structured clinical variables and kidney-specific report embeddings appeared to provide complementary predictive value: clinical variables provided the strongest signal in the early fusion setting, whereas kidney-specific text embeddings contributed most strongly in the middle and late fusion models. Together, these findings suggest that multimodal EHR modeling is a promising and scalable approach for supporting preoperative renal mass risk stratification, although external validation and formal statistical comparisons are needed before claims of superiority or clinical effectiveness can be made.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary tables and figures.</p>
        <media xlink:href="medinform_v14i1e84396_app1.docx" xlink:title="DOCX File , 447 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AdamW</term>
          <def>
            <p>Adam optimizer with decoupled weight decay</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BioBERT</term>
          <def>
            <p>biomedical BERT</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">BLEU-4</term>
          <def>
            <p>Bilingual Evaluation Understudy-4</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ClinicalBERT</term>
          <def>
            <p>BERT pretrained on clinical text</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">CT</term>
          <def>
            <p>computed tomography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">KC</term>
          <def>
            <p>kidney cancer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">MLP</term>
          <def>
            <p>multilayer perceptron</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">OOF</term>
          <def>
            <p>out-of-fold</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">PheWAS</term>
          <def>
            <p>Phenome-Wide Association Study</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">PubMedBERT</term>
          <def>
            <p>BERT pretrained on PubMed text</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">RadBERT</term>
          <def>
            <p>radiology BERT</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">RCC</term>
          <def>
            <p>renal cell carcinoma</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">SHAP</term>
          <def>
            <p>Shapley additive explanations</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb21">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors also wish to acknowledge the developers of the large language models evaluated in this study, whose groundbreaking work made this research possible. While drafting this manuscript, the authors used ChatGPT (OpenAI) with the sole purpose of checking the grammar of the text.</p>
    </ack>
    <notes>
      <title>Data Availability</title>
      <p>The datasets generated during and/or analyzed during this study are not publicly available due to patient confidentiality constraints and institutional review board restrictions, but are available from the corresponding author on reasonable request and with approval from the institutional review committee.</p>
    </notes>
    <notes>
      <title>Funding</title>
      <p>No external financial support or grants were received from any public, commercial, or not-for-profit entities for the research, authorship, or publication of this article.</p>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>Conceptualization: ZF</p>
        <p>Data curation: ZF, RL, CS, JP</p>
        <p>Formal analysis: ZF</p>
        <p>Methodology: ZF, RL</p>
        <p>Visualization: RL</p>
        <p>Supervision: RT, JX</p>
        <p>Writing – original draft: ZF</p>
        <p>Writing – review &#38; editing: CS, JP, RT, JX</p>
        <p>Correspondence: JX</p>
        <p>All authors critically reviewed the manuscript for scientific content and approved the final version for publication.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>SEER cancer stat facts: Kidney and renal pelvis cancer</article-title>
          <source>National Cancer Institute</source>
          <access-date>2025-07-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://seer.cancer.gov/statfacts/html/kidrp.html">https://seer.cancer.gov/statfacts/html/kidrp.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>Fast facts about kidney cancer</article-title>
          <source>Kidney Cancer Association</source>
          <access-date>2025-07-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.kidneycancer.org/essentials/fast-facts/">https://www.kidneycancer.org/essentials/fast-facts/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <article-title>Cancer statistics center</article-title>
          <source>American Cancer Society</source>
          <access-date>2025-07-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cancerstatisticscenter.cancer.org/">https://cancerstatisticscenter.cancer.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rose</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>WY</given-names>
            </name>
          </person-group>
          <article-title>Renal cell carcinoma: a review</article-title>
          <source>JAMA</source>
          <year>2024</year>
          <volume>332</volume>
          <issue>12</issue>
          <fpage>1001</fpage>
          <lpage>1010</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2024.12848</pub-id>
          <pub-id pub-id-type="medline">39196544</pub-id>
          <pub-id pub-id-type="pii">2822917</pub-id>
          <pub-id pub-id-type="pmcid">PMC11790279</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheaib</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>HD</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Gorin</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Haut</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>Canner</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Allaf</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Pierorazio</surname>
              <given-names>PM</given-names>
            </name>
          </person-group>
          <article-title>Stage-specific conditional survival in renal cell carcinoma after nephrectomy</article-title>
          <source>Urol Oncol</source>
          <year>2020</year>
          <volume>38</volume>
          <issue>1</issue>
          <fpage>6.e1</fpage>
          <lpage>6.e7</lpage>
          <pub-id pub-id-type="doi">10.1016/j.urolonc.2019.08.011</pub-id>
          <pub-id pub-id-type="medline">31522864</pub-id>
          <pub-id pub-id-type="pii">S1078-1439(19)30329-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Semerjian</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pavlovich</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Gorin</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Allaf</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Pierorazio</surname>
              <given-names>PM</given-names>
            </name>
          </person-group>
          <article-title>Surgical removal of renal tumors with low metastatic potential based on clinical radiographic size: a systematic review of the literature</article-title>
          <source>Urol Oncol</source>
          <year>2019</year>
          <volume>37</volume>
          <issue>8</issue>
          <fpage>519</fpage>
          <lpage>524</lpage>
          <pub-id pub-id-type="doi">10.1016/j.urolonc.2019.05.013</pub-id>
          <pub-id pub-id-type="medline">31202730</pub-id>
          <pub-id pub-id-type="pii">S1078-1439(19)30199-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Deep learning assessment of small renal masses at contrast-enhanced multiphase CT</article-title>
          <source>Radiology</source>
          <year>2024</year>
          <volume>311</volume>
          <issue>2</issue>
          <fpage>e232178</fpage>
          <pub-id pub-id-type="doi">10.1148/radiol.232178</pub-id>
          <pub-id pub-id-type="medline">38742970</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence links CT images to pathologic features and survival outcomes of renal masses</article-title>
          <source>Nat Commun</source>
          <year>2025</year>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>1425</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-025-56784-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-025-56784-z</pub-id>
          <pub-id pub-id-type="medline">39915478</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-025-56784-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC11802731</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klontzas</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Kalarakis</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Koltsakis</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Papathomas</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Karantanas</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Tzortzakakis</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Convolutional neural networks for the differentiation between benign and malignant renal tumors with a multicenter international computed tomography dataset</article-title>
          <source>Insights Imaging</source>
          <year>2024</year>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>26</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38270726"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13244-023-01601-8</pub-id>
          <pub-id pub-id-type="medline">38270726</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13244-023-01601-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC10811309</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A deep learning-based radiomics model for differentiating benign and malignant renal tumors</article-title>
          <source>Transl Oncol</source>
          <year>2019</year>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>292</fpage>
          <lpage>300</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1936-5233(18)30341-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.tranon.2018.10.012</pub-id>
          <pub-id pub-id-type="medline">30448734</pub-id>
          <pub-id pub-id-type="pii">S1936-5233(18)30341-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC6299150</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Nan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A robust deep learning method with uncertainty estimation for the pathological classification of renal cell carcinoma based on CT Images</article-title>
          <source>J Imaging Inform Med</source>
          <year>2025</year>
          <volume>38</volume>
          <issue>3</issue>
          <fpage>1323</fpage>
          <lpage>1333</lpage>
          <pub-id pub-id-type="doi">10.1007/s10278-024-01276-7</pub-id>
          <pub-id pub-id-type="medline">39313716</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10278-024-01276-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC12092889</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nie</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The interpretable CT-based vision transformer model for preoperative prediction of clear cell renal cell carcinoma SSIGN score and outcome</article-title>
          <source>Insights Imaging</source>
          <year>2025</year>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>98</fpage>
          <pub-id pub-id-type="doi">10.1186/s13244-025-01972-0</pub-id>
          <pub-id pub-id-type="medline">40346303</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13244-025-01972-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC12064486</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Deep learning-based multi-model prediction for disease-free survival status of patients with clear cell renal cell carcinoma after surgery: a multicenter cohort study</article-title>
          <source>Int J Surg</source>
          <year>2024</year>
          <volume>110</volume>
          <issue>5</issue>
          <fpage>2970</fpage>
          <lpage>2977</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38445478"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/JS9.0000000000001222</pub-id>
          <pub-id pub-id-type="medline">38445478</pub-id>
          <pub-id pub-id-type="pii">01279778-202405000-00052</pub-id>
          <pub-id pub-id-type="pmcid">PMC11093464</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Bayati</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Hasan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pruthi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kaushik</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liss</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Systematic review of modifiable risk factors for kidney cancer</article-title>
          <source>Urol Oncol</source>
          <year>2019</year>
          <volume>37</volume>
          <issue>6</issue>
          <fpage>359</fpage>
          <lpage>371</lpage>
          <pub-id pub-id-type="doi">10.1016/j.urolonc.2018.12.008</pub-id>
          <pub-id pub-id-type="medline">30685335</pub-id>
          <pub-id pub-id-type="pii">S1078-1439(18)30502-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ehrenstein</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kharrazi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lehmann</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>Obtaining Data from Electronic Health Records</source>
          <year>2019</year>
          <publisher-loc>Rockville, MD</publisher-loc>
          <publisher-name>Agency for Healthcare Research and Quality</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Pareek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Seyyedi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Banerjee</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Lungren</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <article-title>Fusion of medical imaging and electronic health records using deep learning: a systematic review and implementation guidelines</article-title>
          <source>NPJ Digit Med</source>
          <year>2020</year>
          <volume>3</volume>
          <fpage>136</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-020-00341-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-020-00341-z</pub-id>
          <pub-id pub-id-type="medline">33083571</pub-id>
          <pub-id pub-id-type="pii">341</pub-id>
          <pub-id pub-id-type="pmcid">PMC7567861</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>HJ</given-names>
            </name>
          </person-group>
          <article-title>Managing unstructured big data in healthcare system</article-title>
          <source>Healthc Inform Res</source>
          <year>2019</year>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>2</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30788175"/>
          </comment>
          <pub-id pub-id-type="doi">10.4258/hir.2019.25.1.1</pub-id>
          <pub-id pub-id-type="medline">30788175</pub-id>
          <pub-id pub-id-type="pmcid">PMC6372467</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Locke</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bashall</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Adely</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kitchen</surname>
              <given-names>GB</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing in medicine: a review</article-title>
          <source>Trends Anaesth Crit Care</source>
          <year>2021</year>
          <volume>38</volume>
          <fpage>4</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/j.tacc.2021.02.007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mehrabi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Clinical information extraction applications: a literature review</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <volume>77</volume>
          <fpage>34</fpage>
          <lpage>49</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30256-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.11.011</pub-id>
          <pub-id pub-id-type="medline">29162496</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30256-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC5771858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nobel</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Puts</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bakers</surname>
              <given-names>FCH</given-names>
            </name>
            <name name-style="western">
              <surname>Robben</surname>
              <given-names>SGF</given-names>
            </name>
            <name name-style="western">
              <surname>Dekker</surname>
              <given-names>ALAJ</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing in Dutch free text radiology reports: Challenges in a small language area staging pulmonary oncology</article-title>
          <source>J Digit Imaging</source>
          <year>2020</year>
          <month>08</month>
          <volume>33</volume>
          <issue>4</issue>
          <fpage>1002</fpage>
          <lpage>1008</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32076924"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10278-020-00327-z</pub-id>
          <pub-id pub-id-type="medline">32076924</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10278-020-00327-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC7522136</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Redjdal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Novikava</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kempf</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bouaud</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Seroussi</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Leveraging rule-based NLP to translate textual reports as structured inputs automatically processed by a clinical decision support system</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2024</year>
          <volume>316</volume>
          <fpage>1861</fpage>
          <lpage>1865</lpage>
          <pub-id pub-id-type="doi">10.3233/SHTI240794</pub-id>
          <pub-id pub-id-type="medline">39176854</pub-id>
          <pub-id pub-id-type="pii">SHTI240794</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>OpenAI</collab>
          </person-group>
          <article-title>GPT-4 technical report</article-title>
          <source>arXiv. Preprint posted online on March 15, 2023</source>
          <pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Touvron</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Albert</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Almahairi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>LLaMA 2: open foundation and fine-tuned chat models</article-title>
          <source>arXiv. Preprint posted online on July 19, 2023</source>
          <pub-id pub-id-type="doi">10.48550/arXiv.2307.09288</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Vaid</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Menon</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Freeman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Matteson</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Marin</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Nadkarni</surname>
              <given-names>GN</given-names>
            </name>
          </person-group>
          <article-title>Using large language models to automate data extraction from surgical pathology reports: retrospective cohort study</article-title>
          <source>JMIR Form Res</source>
          <year>2025</year>
          <volume>9</volume>
          <fpage>e64544</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2025//e64544/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/64544</pub-id>
          <pub-id pub-id-type="medline">40194317</pub-id>
          <pub-id pub-id-type="pii">v9i1e64544</pub-id>
          <pub-id pub-id-type="pmcid">PMC11996145</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Jang</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Developing prompts from large language model for extracting clinical information from pathology and ultrasound reports in breast cancer</article-title>
          <source>Radiat Oncol J</source>
          <year>2023</year>
          <volume>41</volume>
          <issue>3</issue>
          <fpage>209</fpage>
          <lpage>216</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37793630"/>
          </comment>
          <pub-id pub-id-type="doi">10.3857/roj.2023.00633</pub-id>
          <pub-id pub-id-type="medline">37793630</pub-id>
          <pub-id pub-id-type="pii">roj.2023.00633</pub-id>
          <pub-id pub-id-type="pmcid">PMC10556835</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Zack</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Demirci</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sushil</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Miao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kasap</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Collisson</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Assessing large language models for oncology data inference from radiology reports</article-title>
          <source>JCO Clin Cancer Inform</source>
          <year>2024</year>
          <volume>8</volume>
          <fpage>e2400126</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://escholarship.org/uc/item/qt94q255pv"/>
          </comment>
          <pub-id pub-id-type="doi">10.1200/CCI.24.00126</pub-id>
          <pub-id pub-id-type="medline">39661914</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grothey</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Odenkirchen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Brkic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schömig-Markiefka</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Quaas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Büttner</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tolkach</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Comprehensive testing of large language models for extraction of structured data in pathology</article-title>
          <source>Commun Med (Lond)</source>
          <year>2025</year>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>96</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s43856-025-00808-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s43856-025-00808-8</pub-id>
          <pub-id pub-id-type="medline">40164789</pub-id>
          <pub-id pub-id-type="pii">10.1038/s43856-025-00808-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC11958830</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Rong</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nezafati</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Treager</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Klesse</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>ED</given-names>
            </name>
            <name name-style="western">
              <surname>Zhan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A critical assessment of using ChatGPT for extracting structured data from clinical notes</article-title>
          <source>NPJ Digit Med</source>
          <year>2024</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>106</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-024-01079-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-024-01079-8</pub-id>
          <pub-id pub-id-type="medline">38693429</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-024-01079-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC11063058</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Steyaert</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pizurica</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nagaraj</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Khandelwal</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez-Boussard</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gentles</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gevaert</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Multimodal data fusion for cancer biomarker discovery with deep learning</article-title>
          <source>Nat Mach Intell</source>
          <year>2023</year>
          <volume>5</volume>
          <issue>4</issue>
          <fpage>351</fpage>
          <lpage>362</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37693852"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s42256-023-00633-5</pub-id>
          <pub-id pub-id-type="medline">37693852</pub-id>
          <pub-id pub-id-type="pmcid">PMC10484010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Waqas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tripathi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ramachandran</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Rasool</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Multimodal data integration for oncology in the era of deep neural networks: a review</article-title>
          <source>Front Artif Intell</source>
          <year>2024</year>
          <volume>7</volume>
          <fpage>1408843</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/frai.2024.1408843"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/frai.2024.1408843</pub-id>
          <pub-id pub-id-type="medline">39118787</pub-id>
          <pub-id pub-id-type="pmcid">PMC11308435</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Purkayastha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>RY</given-names>
            </name>
            <name name-style="western">
              <surname>Silva</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Vallières</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Habibollahi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gade</surname>
              <given-names>TP</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Soulen</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>HX</given-names>
            </name>
            <name name-style="western">
              <surname>Stavropoulos</surname>
              <given-names>SW</given-names>
            </name>
          </person-group>
          <article-title>Deep learning to distinguish benign from malignant renal lesions based on routine MR imaging</article-title>
          <source>Clin Cancer Res</source>
          <year>2020</year>
          <volume>26</volume>
          <issue>8</issue>
          <fpage>1944</fpage>
          <lpage>1952</lpage>
          <pub-id pub-id-type="doi">10.1158/1078-0432.CCR-19-0374</pub-id>
          <pub-id pub-id-type="medline">31937619</pub-id>
          <pub-id pub-id-type="pii">1078-0432.CCR-19-0374</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Terry</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Classification of benign and malignant renal tumors based on CT scans and clinical data using machine learning methods</article-title>
          <source>Informatics</source>
          <year>2023</year>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>55</fpage>
          <pub-id pub-id-type="doi">10.3390/informatics10030055</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Bellolio</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Medrano-Gracia</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Werys</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mahajan</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Use of natural language processing to improve predictive models for imaging utilization in children presenting to the emergency department</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>287</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-1006-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-1006-6</pub-id>
          <pub-id pub-id-type="medline">31888609</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-1006-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6937987</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Combining structured and unstructured data for predictive models: a deep learning approach</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2020</year>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>280</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-020-01297-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-020-01297-6</pub-id>
          <pub-id pub-id-type="medline">33121479</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-020-01297-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7596962</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Abell-Hart</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A multimodal transformer: fusing clinical notes with structured EHR data for interpretable in-hospital mortality prediction</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2022</year>
          <volume>2022</volume>
          <fpage>719</fpage>
          <lpage>728</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37128451"/>
          </comment>
          <pub-id pub-id-type="medline">37128451</pub-id>
          <pub-id pub-id-type="pii">917</pub-id>
          <pub-id pub-id-type="pmcid">PMC10148371</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Ritchie</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Basford</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Pulley</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Bastarache</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Brown-Gentry</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Masys</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Roden</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Crawford</surname>
              <given-names>DC</given-names>
            </name>
          </person-group>
          <article-title>PheWAS: demonstrating the feasibility of a phenome-wide scan to discover gene-disease associations</article-title>
          <source>Bioinformatics</source>
          <year>2010</year>
          <volume>26</volume>
          <issue>9</issue>
          <fpage>1205</fpage>
          <lpage>1210</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20335276"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btq126</pub-id>
          <pub-id pub-id-type="medline">20335276</pub-id>
          <pub-id pub-id-type="pii">btq126</pub-id>
          <pub-id pub-id-type="pmcid">PMC2859132</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <article-title>ATC/DDD toolkit: anatomical therapeutic chemical classification</article-title>
          <source>WHO Collaborating Centre for Drug Statistics Methodology</source>
          <year>2025</year>
          <access-date>2025-08-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/tools/atc-ddd-toolkit">https://www.who.int/tools/atc-ddd-toolkit</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hui</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Qwen2.5 technical report</article-title>
          <source>arXiv. Preprint posted online on January 3, 2025</source>
          <pub-id pub-id-type="doi">10.48550/arXiv.2412.15115</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dubey</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jauhri</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pandey</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kadian</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The Llama 3 herd of models</article-title>
          <source>arXiv. Preprint posted online on July 31, 2024</source>
          <pub-id pub-id-type="doi">10.48550/arXiv.2407.21783</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Papineni</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Roukos</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ward</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>BLEU: a method for automatic evaluation of machine translation</article-title>
          <year>2002</year>
          <conf-name>Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>2002 July 7-12</conf-date>
          <conf-loc>Philadelphia Pennsylvania</conf-loc>
          <fpage>311</fpage>
          <lpage>318</lpage>
          <pub-id pub-id-type="doi">10.3115/1073083.1073135</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Regression shrinkage and selection via the lasso</article-title>
          <source>J R Stat Soc Series B Stat Methodol</source>
          <year>2018</year>
          <volume>58</volume>
          <issue>1</issue>
          <fpage>267</fpage>
          <lpage>288</lpage>
          <pub-id pub-id-type="doi">10.1111/j.2517-6161.1996.tb02080.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cox</surname>
              <given-names>DR</given-names>
            </name>
          </person-group>
          <article-title>The regression analysis of binary sequences</article-title>
          <source>J R Stat Soc Series B Stat Methodol</source>
          <year>2018</year>
          <volume>20</volume>
          <issue>2</issue>
          <fpage>215</fpage>
          <lpage>232</lpage>
          <pub-id pub-id-type="doi">10.1111/j.2517-6161.1958.tb00292.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rumelhart</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Learning representations by back-propagating errors</article-title>
          <source>Nature</source>
          <year>1986</year>
          <volume>323</volume>
          <issue>6088</issue>
          <fpage>533</fpage>
          <lpage>536</lpage>
          <pub-id pub-id-type="doi">10.1038/323533a0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Mach Learn</source>
          <year>2001</year>
          <volume>45</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <lpage>32</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pophealthmetrics.biomedcentral.com/articles/10.1186/1478-7954-9-29"/>
          </comment>
          <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
          <pub-id pub-id-type="pii">1478-7954-9-29</pub-id>
          <pub-id pub-id-type="pmcid">PMC3160922</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cortes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vapnik</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Support-vector networks</article-title>
          <source>Mach Learn</source>
          <year>1995</year>
          <volume>20</volume>
          <issue>3</issue>
          <fpage>273</fpage>
          <lpage>297</lpage>
          <pub-id pub-id-type="doi">10.1007/BF00994018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>XGBoost: a scalable tree boosting system</article-title>
          <year>2016</year>
          <conf-name>KDD '16: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>2016 August 13-17</conf-date>
          <conf-loc>San Francisco California USA</conf-loc>
          <fpage>785</fpage>
          <lpage>794</lpage>
          <pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McAuley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>EY</given-names>
            </name>
            <name name-style="western">
              <surname>Gentili</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>CN</given-names>
            </name>
          </person-group>
          <article-title>RadBERT: adapting transformer-based language models to radiology</article-title>
          <source>Radiol Artif Intell</source>
          <year>2022</year>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>e210258</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35923376"/>
          </comment>
          <pub-id pub-id-type="doi">10.1148/ryai.210258</pub-id>
          <pub-id pub-id-type="medline">35923376</pub-id>
          <pub-id pub-id-type="pmcid">PMC9344353</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Altosaar</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ranganath</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>ClinicalBERT: modeling clinical notes and predicting hospital readmission</article-title>
          <source>arXiv. Preprint posted online on April 10, 2019</source>
          <pub-id pub-id-type="doi">10.48550/arXiv.1904.05342</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1234</fpage>
          <lpage>1240</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31501885"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
          <pub-id pub-id-type="medline">31501885</pub-id>
          <pub-id pub-id-type="pii">5566506</pub-id>
          <pub-id pub-id-type="pmcid">PMC7703786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tinn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lucas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Usuyama</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Naumann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Poon</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Domain-specific language model pretraining for biomedical natural language processing</article-title>
          <source>arXiv. Preprint posted online on July 31, 2020</source>
          <pub-id pub-id-type="doi">10.48550/arXiv.2007.15779</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birnbaum</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Bosniak</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Krinsky</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Waisman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ambrosino</surname>
              <given-names>MM</given-names>
            </name>
          </person-group>
          <article-title>Renal cell carcinoma: correlation of CT findings with nuclear morphologic grading in 100 tumors</article-title>
          <source>Abdom Imaging</source>
          <year>1994</year>
          <volume>19</volume>
          <issue>3</issue>
          <fpage>262</fpage>
          <lpage>266</lpage>
          <pub-id pub-id-type="doi">10.1007/BF00203523</pub-id>
          <pub-id pub-id-type="medline">8019359</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alcala</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mariosa</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Smith-Byrne</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nasrollahzadeh Nesheli</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Carreras-Torres</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ardanaz Aicua</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bondonno</surname>
              <given-names>NP</given-names>
            </name>
            <name name-style="western">
              <surname>Bonet</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Brunström</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bueno-de-Mesquita</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chirlaque</surname>
              <given-names>M-D</given-names>
            </name>
            <name name-style="western">
              <surname>Christakoudi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Heath</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Kaaks</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Katzke</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Krogh</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ljungberg</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>May</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Melander</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Palli</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez-Barranco</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sacerdote</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Stocks</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tjønneland</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Travis</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Vermeulen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chanock</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Purdue</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Weiderpass</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Muller</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brennan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Johansson</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The relationship between blood pressure and risk of renal cell carcinoma</article-title>
          <source>Int J Epidemiol</source>
          <year>2022</year>
          <volume>51</volume>
          <issue>4</issue>
          <fpage>1317</fpage>
          <lpage>1327</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://hdl.handle.net/10668/19800"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/ije/dyac042</pub-id>
          <pub-id pub-id-type="medline">35312764</pub-id>
          <pub-id pub-id-type="pii">6551537</pub-id>
          <pub-id pub-id-type="pmcid">PMC9365619</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The association between BMI and kidney cancer risk: an updated dose-response meta-analysis in accordance with PRISMA guideline</article-title>
          <source>Medicine (Baltimore)</source>
          <year>2018</year>
          <volume>97</volume>
          <issue>44</issue>
          <fpage>e12860</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30383638"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MD.0000000000012860</pub-id>
          <pub-id pub-id-type="medline">30383638</pub-id>
          <pub-id pub-id-type="pii">00005792-201811020-00016</pub-id>
          <pub-id pub-id-type="pmcid">PMC6221676</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Uzzo</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Allaf</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Bass</surname>
              <given-names>EB</given-names>
            </name>
            <name name-style="western">
              <surname>Cadeddu</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Derweesh</surname>
              <given-names>IH</given-names>
            </name>
            <name name-style="western">
              <surname>Giambarresi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gervais</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Lane</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Leibovich</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Pierorazio</surname>
              <given-names>PM</given-names>
            </name>
          </person-group>
          <article-title>Renal mass and localized renal cancer: AUA guideline</article-title>
          <source>J Urol</source>
          <year>2017</year>
          <volume>198</volume>
          <issue>3</issue>
          <fpage>520</fpage>
          <lpage>529</lpage>
          <pub-id pub-id-type="doi">10.1016/j.juro.2017.04.100</pub-id>
          <pub-id pub-id-type="medline">28479239</pub-id>
          <pub-id pub-id-type="pii">S0022-5347(17)59870-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ertekin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Amasyalı</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Erol</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Acikgozoglu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kucukdurmaz</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Nayman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Erol</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Role of contrast enhancement and corrected attenuation values of renal tumors in predicting renal cell carcinoma (RCC) subtypes: protocol for a triphasic multi-slice computed tomography (CT) procedure</article-title>
          <source>Pol J Radiol</source>
          <year>2017</year>
          <volume>82</volume>
          <fpage>384</fpage>
          <lpage>391</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.12659/PJR.901957"/>
          </comment>
          <pub-id pub-id-type="doi">10.12659/PJR.901957</pub-id>
          <pub-id pub-id-type="medline">28811845</pub-id>
          <pub-id pub-id-type="pii">901957</pub-id>
          <pub-id pub-id-type="pmcid">PMC5530140</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Buuren</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Groothuis-Oudshoorn</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>MICE: multivariate imputation by chained equations in R</article-title>
          <source>J Stat Soft</source>
          <year>2011</year>
          <volume>45</volume>
          <issue>3</issue>
          <fpage>1</fpage>
          <lpage>67</lpage>
          <pub-id pub-id-type="doi">10.18637/jss.v045.i03</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
