<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v14i1e65596</article-id><article-id pub-id-type="doi">10.2196/65596</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Effects of Image Degradation on Deep Neural Network Classification of Scaphoid Fracture Radiographs: Comparison Study of Different Noise Types</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Lin</surname><given-names>Chihung</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Yoon</surname><given-names>Alfred P</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Chien-Wei</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Chao</surname><given-names>Tung</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Chung</surname><given-names>Kevin C</given-names></name><degrees>MS, MD</degrees><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Kuo</surname><given-names>Chang-Fu</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="aff" rid="aff6">6</xref><xref ref-type="aff" rid="aff7">7</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>Center for Artificial Intelligence in Medicine, Chang Gung Memorial Hospital</institution><addr-line>Taoyuan</addr-line><country>Taiwan</country></aff><aff id="aff2"><institution>Department of Artificial Intelligence, Chang Gung University</institution><addr-line>Taoyuan</addr-line><country>Taiwan</country></aff><aff id="aff3"><institution>Division of Plastic and Reconstructive Surgery, Section of Hand and Microvascular Surgery, University of California Davis Medical Center</institution><addr-line>Sacramento</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff4"><institution>Comprehensive Hand Center, Michigan Medicine</institution><addr-line>Ann Arbor</addr-line><addr-line>MI</addr-line><country>United States</country></aff><aff id="aff5"><institution>Division of Rheumatology, Allergy and Immunology, Center for Artificial Intelligence in Medicine, Chang Gung Memorial Hospital</institution><addr-line>No.5, Fuxing Street, Guishan District, Taoyuan City 333</addr-line><addr-line>Taoyuan</addr-line><country>Taiwan</country></aff><aff id="aff6"><institution>College of Medicine, Chang Gung University</institution><addr-line>Taoyuan</addr-line><country>Taiwan</country></aff><aff id="aff7"><institution>Division of Rheumatology, Orthopaedics and Dermatology, School of Medicine, University of Nottingham</institution><addr-line>Nottingham</addr-line><country>United Kingdom</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Marginean</surname><given-names>Anca-Nicoleta</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Panboonyuen</surname><given-names>Teerapong</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Chang-Fu Kuo, MD, PhD, Division of Rheumatology, Allergy and Immunology, Center for Artificial Intelligence in Medicine, Chang Gung Memorial Hospital, No.5, Fuxing Street, Guishan District, Taoyuan City 333, Taoyuan, 333, Taiwan, 886 3328-1200; <email>zandis@gmail.com</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>22</day><month>1</month><year>2026</year></pub-date><volume>14</volume><elocation-id>e65596</elocation-id><history><date date-type="received"><day>20</day><month>08</month><year>2024</year></date><date date-type="rev-recd"><day>07</day><month>12</month><year>2025</year></date><date date-type="accepted"><day>08</day><month>12</month><year>2025</year></date></history><copyright-statement>&#x00A9; Chihung Lin, Alfred P Yoon, Chien-Wei Wang, Tung Chao, Kevin C Chung, Chang-Fu Kuo. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 22.1.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2026/1/e65596"/><abstract><sec><title>Background</title><p>Deep learning models have shown strong potential for automated fracture detection in medical images. However, their robustness under varying image quality remains uncertain, particularly for small and subtle fractures, such as scaphoid fractures. Understanding how different types of image perturbations affect model performance is crucial for ensuring reliable deployment in clinical practice.</p></sec><sec><title>Objective</title><p>This study aimed to evaluate the robustness of a deep learning model trained to detect scaphoid fractures in radiographs when exposed to various image perturbations. We sought to identify which perturbations most strongly impact performance and to explore strategies to mitigate performance degradation.</p></sec><sec sec-type="methods"><title>Methods</title><p>Radiographic datasets were systematically modified by applying Gaussian noise, blurring, JPEG compression, contrast-limited adaptive histogram equalization, resizing, and geometric offsets. Model accuracy was evaluated across different perturbation types and levels. Image quality was quantified using peak signal-to-noise ratio and structural similarity index measure to assess correlations between degradation and model performance.</p></sec><sec sec-type="results"><title>Results</title><p>Model accuracy declined with increasing perturbation severity, but the extent varied across perturbation types. Gaussian blur caused the most substantial performance drop, whereas contrast-limited adaptive histogram equalization increased the false-negative rate. The model demonstrated higher resilience to color perturbations than to grayscale degradations. A strong linear correlation was found between peak signal-to-noise ratio&#x2013;structural similarity index measure and accuracy, suggesting that better image quality led to improved detection. Geometric offsets and pixel value rescaling had minimal influence, whereas resolution was the dominant factor affecting performance.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The findings indicate that image quality, especially resolution and blurring, substantially influences the robustness of deep learning&#x2013;based fracture detection models. Ensuring adequate image resolution and quality control can enhance diagnostic reliability. These results provide valuable insights for designing more accurate and resilient medical imaging models under real-world variability.</p></sec></abstract><kwd-group><kwd>scaphoid fractures</kwd><kwd>contrast-limited adaptive histogram equalization</kwd><kwd>CLAHE</kwd><kwd>image quality</kwd><kwd>peak signal-to-noise ratio</kwd><kwd>PSNR</kwd><kwd>structural similarity index measure</kwd><kwd>SSIM</kwd><kwd>neural network.</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Significant advances in computer vision have led to remarkable improvements in deep neural network (DNN) performance on tasks such as medical image classification [<xref ref-type="bibr" rid="ref1">1</xref>]. Despite these achievements, DNN-based systems exhibit limited robustness compared with human perception, especially in the context of image perturbations and corruption [<xref ref-type="bibr" rid="ref2">2</xref>]. Adversarial attacks can cause substantial misclassifications, and even minor noise can adversely affect performance [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Nevertheless, state-of-the-art classifiers appear to exhibit some ability to overcome random noise [<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>In real-world medical imaging, DNN model robustness is critical because low-quality images and noise are not uncommon; these issues include radiographic underexposures and motion artifacts as well as downstream processing or transfer distortions that can alter image fidelity and affect artificial intelligence (AI) performance [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. In addition, real-world clinical workflows may involve low-resolution screen recapturing or smartphone-captured radiographs in urgent or resource-limited settings, which introduce compounded degradations, such as resizing, compression, and display-related artifacts [<xref ref-type="bibr" rid="ref9">9</xref>]. Our previous work on a DNN model that detected both visible and occult scaphoid fractures demonstrated that it was possible to reliably detect fractures of small bones and to assist in the radiographic detection of occult fractures that are not visible to human observers [<xref ref-type="bibr" rid="ref10">10</xref>]. This experience indicated that image preprocessing, file formatting, and data storage could negatively impact model performance. We discovered that image processing techniques introduced noise into input files, potentially misleading the DNN model. Such noise, often imperceptible to the human eye, can significantly affect model performance and potentially lead to incorrect diagnoses [<xref ref-type="bibr" rid="ref11">11</xref>]. Consequently, evaluation of performance is vital in convolutional neural network (CNN)&#x2013;based models that process noisy images; this evaluation poses frequent challenges in clinical settings. Prior studies have revealed that neural networks are most accurate when the data to be classified exhibit quality similar to that of the model training data; it was recommended that noise be injected into the training data to increase model robustness [<xref ref-type="bibr" rid="ref12">12</xref>]. However, the addition of many possible noises to a training dataset is both computationally expensive and impractical. To the best of our knowledge, no study has investigated the impacts of different types of noise on neural networks designed for fracture classification when radiographs serve as inputs.</p><p>This study investigated the robustness of our CNN-based scaphoid fracture classification model when various types of image noise were present. To comprehensively evaluate model performance, we simulated real-world conditions that yield low-quality clinical images. Using fine-tuning techniques to create noisy samples, we sought to enhance model performance under noisy or degraded conditions, thereby mitigating the adverse effects of image corruption [<xref ref-type="bibr" rid="ref7">7</xref>]. Additionally, we examined the impacts of specific forms of image degradation on model performance; we explored particular model vulnerabilities that might warrant further refinement. Our goals were to ascertain model accuracy when processing low-quality clinical images and to identify the types of noise that most confounded the model, thereby providing valuable insights to aid the development of more resilient DNN-based medical image classifiers appropriate for real-world applications.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Ethical Considerations</title><p>The study protocol was approved by the institutional review board of Chang Gung Memorial Hospital (202202256B0).</p></sec><sec id="s2-2"><title>Scaphoid Fracture Classification Model</title><p>As previously described in our earlier work [<xref ref-type="bibr" rid="ref10">10</xref>], the scaphoid fracture classification model was built using an EfficientNetB1 [<xref ref-type="bibr" rid="ref13">13</xref>] backbone and 240&#x00D7;240-pixel red, green, and blue (RGB) images with a classification threshold of 0.5. In a study by Yoon et al [<xref ref-type="bibr" rid="ref10">10</xref>], the model was originally trained and validated using 3991 scaphoid fracture radiographs and 5542 normal scaphoid radiographs.</p><p>In this study, we adopted the same model architecture as in the study by Yoon et al [<xref ref-type="bibr" rid="ref10">10</xref>] and initialized the network with the finalized pretrained weights from that study, as shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>. We then further fine-tuned this model using the 5286 training radiographs from the dataset described in the &#x201C;Dataset and Preprocessing&#x201D; section and evaluated it on an independent test set. Fine-tuning was performed using the AdamW optimizer, with appropriate adjustments to the learning rate, weight decay, and batch size. The learning rate was reduced if the validation loss failed to improve over 6 epochs, and training was stopped early when model performance did not increase further after 15 epochs.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Complete pipeline for inference of a scaphoid fracture, including fracture (red box) and occult fracture. This report solely focuses on the performance of the fracture model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e65596_fig01.png"/></fig></sec><sec id="s2-3"><title>Dataset and Preprocessing</title><p>The scaphoid fracture classification model used in this study was fine-tuned and evaluated using 5954 radiographs stored in Chang Gung Memorial Hospital in Taiwan. Of these radiographs, 1483 were of fractured scaphoids, and the remaining 4471 were of normal scaphoids. Images were captured by commercially available x-ray machines from multiple manufacturers. All radiographs were reviewed by 3 experienced radiologists and labeled as <italic>fractured</italic> or <italic>normal</italic>. This dataset is distinct from the dataset used in our previous work [<xref ref-type="bibr" rid="ref10">10</xref>], in which the model was originally developed using 3991 fracture and 5542 normal radiographs. In this study, we adopted the same model architecture as in the study by Yoon et al [<xref ref-type="bibr" rid="ref10">10</xref>], initialized the network with the finalized pretrained weights from that study, and further fine-tuned the model using the 5286 training images described below before evaluating it on an independent test set.</p><p>The training dataset consisted of 5286 images, and the test dataset comprised 668 images. A total of 28 images were excluded for reasons including anatomical anomalies secondary to arthritis (12 images), 3 due to unclear laterality, 2 due to wrong-sided imaging, image artifacts introduced by the hardware (1 image), and imaging findings of likely previous fractures or chronic nonunion (10 images). Thus, the final test dataset contained 640 images. The images varied in size and position, but all were 12-bit grayscale images of posteroanterior views of the wrist. Most images were rectangular, with widths ranging from 1000 to 1600 pixels, and heights ranging from 1600 to 2200 pixels. If the photometric interpretation of an image was MONOCHROME1, the image was converted to MONOCHROME2. The scaphoid was isolated from each hand radiograph using a bounding box generated by a separate scaphoid detection model. This detector (not to be confused with the scaphoid fracture classifier) was not the focus of this study because its performance is robust against common perturbations.</p></sec><sec id="s2-4"><title>Rescaling Images to 8-Bit Depth</title><p>All radiographs were 12-bit grayscale posteroanterior wrist views that varied in terms of size and position, with widths between 1000 and 1600 pixels, and heights between 1600 and 2200 pixels. The images were rescaled to 8-bit grayscale and compiled into RGB images using the value of interest (VOI) lookup table and windowing operations that were also used during model training.</p><p>The DICOM images were stored in 12-bit grayscale, but the model accepted only 24-bit RGB (color) images. Thus, the images were first rescaled to 8-bit grayscale (with pixel values ranging from 0 to 255) and then compiled into RGB images for interpretation. This rescaling process was based on the maximum and minimum values of the image, rather than the actual bit depth. A VOI lookup table and windowing operations were applied to all images to adjust the pixel values based on the DICOM VOI LUT and windowing tags (pydicom. pixel_data_handlers.apply_voi_lut, version 1.4, 2022).</p><p>Differences emerge when images do not contain the lowest and highest pixel values. For 12-bit grayscale images, these pixel values are 0 and 4095, respectively. The resulting difference can brighten or darken the entire image. Furthermore, the use of a VOI lookup process may change the brightness and contrast. These differences are minor and usually imperceptible to the human eye.</p></sec><sec id="s2-5"><title>Generating Datasets for Noise Testing</title><sec id="s2-5-1"><title>Overview</title><p>To assess model performance in the presence of distortions, we generated several low-quality datasets using different noise perturbations. First, we read the test images using method 1. Images were cropped to the scaphoid regions demarcated by the bounding boxes, resized to 240&#x00D7;240 pixels, converted to 8-bit grayscale, and saved in PNG format. We refer to these baseline images (without noise) as &#x201C;clean&#x201D; hereafter.</p><p>To systematically evaluate the robustness of our scaphoid fracture classification model, we created multiple &#x201C;noisy&#x201D; or &#x201C;degraded&#x201D; datasets by applying a variety of image perturbations to our <italic>clean baseline test set </italic>of 640 scaphoid radiographs. Each perturbation type and severity level corresponds to a row in <xref ref-type="table" rid="table1">Table 1</xref>, which lists the name of the treatment, the number of images, and the specific operation performed. Next, we describe the main categories of perturbations and their implementations.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Information on all datasets, including the datasets from which they were modified, sample counts, and methods of modification.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Treatment name</td><td align="left" valign="bottom">Modified from</td><td align="left" valign="bottom">N</td><td align="left" valign="bottom">Operation</td></tr></thead><tbody><tr><td align="left" valign="top">Clean dataset</td><td align="left" valign="top">Original DICOM file</td><td align="left" valign="top">640</td><td align="left" valign="top">Read original DICOM file with (1)</td></tr><tr><td align="left" valign="top">Gaussian blur_0.5</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Apply Gaussian blur (&#x03C3;=0.5) to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian blur_1.0</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Apply Gaussian blur (&#x03C3;=1.0) to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian blur_1.5</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Apply Gaussian blur (&#x03C3;=1.5) to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian blur_2.0</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Apply Gaussian blur (&#x03C3;=2.0) to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian blur_2.5</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Apply Gaussian blur (&#x03C3;=2.5) to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian blur_3.0</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Apply Gaussian blur (&#x03C3;=3.0) to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian noise (RGB)_1</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Add RGB Gaussian noise with SD 1.00 to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian noise (RGB)_2</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Add RGB Gaussian noise with SD 2.00 to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian noise (RGB)_3</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Add RGB Gaussian noise with SD 3.00 to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian noise (RGB)_4</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Add RGB Gaussian noise with SD 4.00 to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian noise (RGB)_5</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Add RGB Gaussian noise with SD 5.00 to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian noise (RGB)_6</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Add RGB Gaussian noise with SD 6.00 to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian noise (grayscale)_1</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Convert image to grayscale, add Gaussian noise with SD 1.00 to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian noise (grayscale)_2</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Convert image to grayscale, add Gaussian noise with SD 2.00 to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian noise (grayscale)_3</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Convert image to grayscale, add Gaussian noise with SD 3.00 to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian noise (grayscale)_4</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Convert image to grayscale, add Gaussian noise with SD 4.00 to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian noise (grayscale)_5</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Convert image to grayscale, add Gaussian noise with SD 5.00 to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Gaussian noise (grayscale)_6.0</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Convert image to grayscale, add Gaussian noise with SD 6.00 to the cropped scaphoid</td></tr><tr><td align="left" valign="top">JPEG compression_10</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Degrade scaphoid images with compression strength=10 (slightest)</td></tr><tr><td align="left" valign="top">JPEG compression_30</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Degrade scaphoid images with compression strength=30</td></tr><tr><td align="left" valign="top">JPEG compression_50</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Degrade scaphoid images with compression strength=50</td></tr><tr><td align="left" valign="top">JPEG compression_70</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Degrade scaphoid images with compression strength=70</td></tr><tr><td align="left" valign="top">JPEG compression_90</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Degrade scaphoid images with compression strength=90 (strongest)</td></tr><tr><td align="left" valign="top">CLAHE<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>_1</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Apply CLAHE (clip limit=1.0, tile grid size=[8, 8]) to the cropped scaphoid</td></tr><tr><td align="left" valign="top">CLAHE_2</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Apply CLAHE (clip limit=2.0, tile grid size=[8, 8]) to the cropped scaphoid</td></tr><tr><td align="left" valign="top">CLAHE_3</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Apply CLAHE (clip limit=3.0, tile grid size=[8, 8]) to the cropped scaphoid</td></tr><tr><td align="left" valign="top">CLAHE_4</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Apply CLAHE (clip limit=4.0, tile grid size=[8, 8]) to the cropped scaphoid</td></tr><tr><td align="left" valign="top">CLAHE_5</td><td align="left" valign="top">Scaphoid</td><td align="left" valign="top">640</td><td align="left" valign="top">Apply CLAHE (clip limit=5.0, tile grid size=[8, 8]) to the cropped scaphoid</td></tr><tr><td align="left" valign="top">Resize_400</td><td align="left" valign="top">Whole image</td><td align="left" valign="top">640</td><td align="left" valign="top">Resize whole images by width=400 while keeping the aspect ratio and crop out the scaphoid by projecting the recorded bounding box coordinates</td></tr><tr><td align="left" valign="top">Resize_600</td><td align="left" valign="top">Whole image</td><td align="left" valign="top">640</td><td align="left" valign="top">Resize whole images by width=600 while keeping the aspect ratio and crop out the scaphoid by projecting the recorded bounding box coordinates</td></tr><tr><td align="left" valign="top">Resize_800</td><td align="left" valign="top">Whole image</td><td align="left" valign="top">640</td><td align="left" valign="top">Resize whole images by width=800 while keeping the aspect ratio and crop out the scaphoid by projecting the recorded bounding box coordinates</td></tr><tr><td align="left" valign="top">Resize_1000</td><td align="left" valign="top">Whole image</td><td align="left" valign="top">640</td><td align="left" valign="top">Resize whole images by width=1000 while keeping the aspect ratio and crop out the scaphoid by projecting the recorded bounding box coordinates</td></tr><tr><td align="left" valign="top">Resize_1200</td><td align="left" valign="top">Whole image</td><td align="left" valign="top">640</td><td align="left" valign="top">Resize the whole images by width=1200 while keeping the aspect ratio and crop out the scaphoid by projecting the recorded bounding box coordinates</td></tr><tr><td align="left" valign="top">Resize_1400</td><td align="left" valign="top">Whole image</td><td align="left" valign="top">640</td><td align="left" valign="top">Resize whole images by width=1400 while keeping the aspect ratio and crop out the scaphoid by projecting the recorded bounding box coordinates</td></tr><tr><td align="left" valign="top">Geometrics_1</td><td align="left" valign="top">Whole image</td><td align="left" valign="top">5120</td><td align="left" valign="top">Modify the center point of bounding box (bbox) x,y coordinates by &#x2212;20% and 0% and 20% of bbox length</td></tr><tr><td align="left" valign="top">Geometrics_2</td><td align="left" valign="top">Whole image</td><td align="left" valign="top">5120</td><td align="left" valign="top">Modify the center point of bbox x,y coordinates by &#x2212;10% and 0% and 10% of bbox length</td></tr><tr><td align="left" valign="top">Geometrics_3</td><td align="left" valign="top">Whole image</td><td align="left" valign="top">10,240</td><td align="left" valign="top">Modify the 4 bbox x,y coordinates (xmin, xmax, ymin, ymax) by &#x2212;10% and 10% of bbox length</td></tr><tr><td align="left" valign="top">Geometrics_4</td><td align="left" valign="top">Whole image</td><td align="left" valign="top">10,240</td><td align="left" valign="top">Modify the 4 bbox x,y coordinates (xmin, xmax, ymin, ymax) by &#x2212;5% and 5% of bbox length</td></tr><tr><td align="left" valign="top">12-bit-rescale_1</td><td align="left" valign="top">Original DICOM file</td><td align="left" valign="top">640</td><td align="left" valign="top">Read original DICOM file using method (2) and crop the scaphoid with the recorded bbox coordinates</td></tr><tr><td align="left" valign="top">12-bit-rescale_2</td><td align="left" valign="top">Original DICOM file</td><td align="left" valign="top">640</td><td align="left" valign="top">Read original DICOM file using method (2) and crop the scaphoid with the detector</td></tr><tr><td align="left" valign="top">Screenshot_MicroDicom_1</td><td align="left" valign="top">Original DICOM file</td><td align="left" valign="top">640</td><td align="left" valign="top">Read DICOM files using MicroDicom with the default settings, screenshoot the whole image at a resolution of 550&#x00D7;780, and crop the scaphoid with the detector</td></tr><tr><td align="left" valign="top">Screenshot_MicroDicom_2</td><td align="left" valign="top">Original DICOM file</td><td align="left" valign="top">640</td><td align="left" valign="top">Read DICOM files using MicroDicom with the default settings, enter fullscreen mode, screenshoot the whole image at a resolution of 900&#x00D7;1050 and crop the scaphoid with the detector</td></tr><tr><td align="left" valign="top">Screenshot_ImageJ</td><td align="left" valign="top">Original DICOM file</td><td align="left" valign="top">640</td><td align="left" valign="top">Read DICOM files using ImageJ, adjust contrast to 12-bit, screenshoot the whole image at a resolution width of 600 while retaining the height-width ratio, and crop the scaphoid with the detector</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>CLAHE: contrast-limited adaptive histogram equalization.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-5-2"><title>Clean Dataset</title><p>We refer to the original 640 cropped scaphoid images (taken directly from DICOM files and converted to 8-bit depth) as the &#x201C;clean&#x201D; dataset (row 1 in <xref ref-type="table" rid="table1">Table 1</xref>). This set serves as our baseline for comparison.</p></sec><sec id="s2-5-3"><title>Gaussian Blur</title><p>We simulated <italic>blur</italic> by convolving each scaphoid region with a Gaussian kernel using SDs (&#x03C3;) ranging from 0.5 to 3.0 (rows 2-7 in <xref ref-type="table" rid="table1">Table 1</xref>). Specifically, we used a Python image-augmentation library (eg, <italic>imgaug</italic>; Python Software Foundation) to apply GaussianBlur(&#x03C3;=x). Each &#x03C3; setting generated a separate dataset of 640 images.</p></sec><sec id="s2-5-4"><title>Gaussian Noise</title><p>For grayscale noise (rows 8-13 in <xref ref-type="table" rid="table1">Table 1</xref>), we converted each cropped scaphoid image to 8-bit grayscale, then added random Gaussian noise with SD values in the set {1, 2, 3, 4, 5, 6}. The resulting noised images were reconverted to RGB (by replicating the grayscale channel 3 times) to match the model&#x2019;s 3-channel input requirement.</p><p>For color (RGB) noise (rows 14-19 in <xref ref-type="table" rid="table1">Table 1</xref>), we similarly added Gaussian noise to the 3 RGB channels, resulting in &#x201C;colored&#x201D; noise. SD values were identical (1-6). Each level produced 640 modified images.</p></sec><sec id="s2-5-5"><title>JPEG Compression</title><p>We degraded the quality of the cropped scaphoid images by JPEG compression (rows 20-24 in <xref ref-type="table" rid="table1">Table 1</xref>), with a compression strength of 10, 30, 50, 70, and 90 (higher values indicating more severe compression in our chosen library). Each compression level formed a dataset of 640 images.</p><p>This process simulates the impact of lossy image storage on fracture detection performance.</p></sec><sec id="s2-5-6"><title>Contrast-Limited Adaptive Histogram Equalization</title><p>Contrast-limited adaptive histogram equalization (CLAHE; rows 25-29 in <xref ref-type="table" rid="table1">Table 1</xref>) enhances local contrast in radiographs, potentially exaggerating edges and intensifying noise. We generated 5 datasets by applying OpenCV&#x2019;s createCLAHE() with <italic>cliplimit</italic> in {1.0, 2.0, 3.0, 4.0, 5.0} and tileGridSize=(8,8). Each CLAHE setting yielded 640 images.</p></sec><sec id="s2-5-7"><title>Resizing Whole Images</title><p>Before cropping the scaphoid region, we resized the <italic>entire</italic> original wrist radiograph (rows 30-35 in <xref ref-type="table" rid="table1">Table 1</xref>) to widths of 400, 600, 800, 1000, 1200, or 1400 pixels (preserving the aspect ratio), then reapplied our scaphoid bounding box coordinates to crop out the scaphoid. Finally, the cropped regions were resized to 240&#x00D7;240 pixels for model input. This procedure mimics variations in image resolution and scaling during clinical acquisition or display.</p></sec><sec id="s2-5-8"><title>Geometric Offsets</title><p>To examine the robustness of our model to bounding box inaccuracies, we systematically shifted or distorted the bounding box coordinates by &#x00B1;5%, 10%, or &#x00B1;20% of the bounding box size. This sometimes resulted in only partial scaphoid capture. Four separate datasets (Geometrics_1 to Geometrics_4) covered different offset ranges (rows 36-39 in <xref ref-type="table" rid="table1">Table 1</xref>). Each row includes multiple transformations, so the total number of images can exceed 640.</p></sec><sec id="s2-5-9"><title>Twelve-bit Rescaling</title><p>Instead of converting the DICOM images with our typical method (method 1), we used an alternative approach (method 2) that directly scales 12-bit raw pixel values (0-4095) into 8-bit (0-255). We generated 2 variations: (1) 12-bit-rescale_1: recorded bounding box coordinates were applied to these rescaled images; (2) 12-bit-rescale_2: we reran the scaphoid detection model on these rescaled images to obtain new bounding boxes.</p></sec><sec id="s2-5-10"><title>Screenshot Datasets</title><p>We opened the original DICOM files in different DICOM viewing software (eg, MicroDicom; ImageJ developed by National Institutes of Health and the Laboratory for Optical and Computational Instrumentation), adjusted the default display or resolution, and took screenshots of the entire wrist X-ray. Screen resolutions varied (eg, 550&#x00D7;780 or 900&#x00D7;1050; rows 42-44 in <xref ref-type="table" rid="table1">Table 1</xref>). We then cropped out the scaphoid region using our detection model, resizing the final images to 240&#x00D7;240. Each screenshot setting introduced different display parameters, simulating suboptimal clinical scenarios where images may be shared or interpreted via screenshots instead of original DICOM files.</p><p>Each of these modifications produced a new dataset of 640 images (except when multiple bounding box transformations were applied, resulting in a large number of images). Collectively, these datasets allowed us to evaluate the effects of image degradations on the classification model&#x2019;s performance. <xref ref-type="table" rid="table1">Table 1</xref> provides a concise summary of all transformations, whereas <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> contains more detailed code snippets and pseudocode for each operation.</p><p>Additionally, 5 datasets were produced using the rescale method or by capturing screenshots from the original DICOM files on image viewers. <xref ref-type="fig" rid="figure2">Figure 2</xref> and <xref ref-type="table" rid="table1">Table 1</xref> detail all 44 perturbations. The details of noise introduction into the dataset are described in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Flowchart of the pipeline used to generate clean and noisy datasets. CLAHE: contrast-limited adaptive histogram equalization;</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e65596_fig02.png"/></fig></sec></sec><sec id="s2-6"><title>Image Quality Assessment</title><p>The peak signal-to-noise ratio (PSNR) and the structural similarity indexing method (SSIM) [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>] were used as image quality assessment methods when gauging alterations in images. PSNR is a widely used metric that assesses the fidelity of an image compared with its original or uncompressed version. PSNR quantifies the difference between 2 images by calculating the ratio of the maximum possible power of the signal (the image) to the power of the noise (the error that is introduced). A higher PSNR value indicates better image quality because less noise is introduced during changes. In contrast, SSIM is a more advanced metric for measuring image quality that considers structural information, luminance, and contrast when comparing 2 images. SSIM calculates local similarities between the 2 images and combines them into a single score, ranging from &#x2212;1 to 1, where a higher value indicates greater similarity between the images. Both PSNR and SSIM values were calculated using TensorFlow image module (TensorFlow Module: tf.image), which allows efficient and accurate computations of both metrics. However, neither PSNR nor SSIM can be used for geometric transformations, such as affine and rotational adjustments, because both rely on pixel-by-pixel comparisons. Such comparisons become less meaningful when the spatial arrangement of image content is altered via geometric transformations. Consequently, datasets with modifications to the labeled scaphoid regions were excluded from comparison to maintain assessment integrity.</p><p>We calculated and compared PSNR and SSIM values for the clean and noisy images. Importantly, neither assessment is amenable to geometric transformations, such as affine and rotational adjustments. Therefore, the following datasets with modifications to the labeled scaphoid regions were excluded from the PSNR and SSIM comparisons: the 4 geometric datasets, the 12-bit rescale_2 datasets, and the 3 screenshot datasets. Both PSNR and SSIM were calculated using TensorFlow image functions (PSNR and SSIM).</p></sec><sec id="s2-7"><title>Model Evaluation and Performance Metrics</title><p>The scaphoid fracture classification model was evaluated by analyzing its performance on various test datasets that included clean images and distorted images with diverse noise levels. Performance metrics, including model accuracy, sensitivity, specificity, and <italic>F</italic><sub>1</sub>-score, were calculated by comparing the model predictions with ground truth labels provided by experienced orthopedic and hand surgeons.</p></sec><sec id="s2-8"><title>Robustness Against Image Quality Degradation</title><p>To assess model robustness against image quality degradation, model performances on distorted images were compared with model performances on clean images. This comparison sought to estimate fracture classification accuracy when perturbations were present, offering insights into potential real-world applications of the model and its robustness when image quality varies in clinical settings.</p></sec><sec id="s2-9"><title>The Environment</title><p>Model inferences were executed on Linux Ubuntu 18.04 LTS (GPU: NVIDIA GeForce RTX 3080 (10 GB); Python version 3.7.13). Implementation was conducted with a TensorFlow backend and TensorFlow version 2.9.1.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Performance of the Deep Learning Model With Varied Perturbations</title><p>We investigated the efficacy of a deep learning model designed to detect scaphoid fractures in radiographs, specifically when image quality had been deliberately compromised by adding perturbations. The primary datasets were altered using various methods: addition of Gaussian noise, blurring, JPEG compression, CLAHE, resizing, and geometric adjustments. <xref ref-type="table" rid="table2">Table 2</xref> presents the quality assessment and model performance results across the evaluated datasets. Specifically, datasets that underwent blurring, Gaussian noise addition, JPEG corruption, CLAHE, or resizing are reported in rows 1&#x2013;35; geometric datasets in rows 36&#x2013;39; bit-rescaled datasets in rows 40&#x2013;41; and screenshot datasets in rows 42&#x2013;44. The model achieved an accuracy of 92.03% on the original unaltered dataset, which thus served as the performance benchmark for other evaluations.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>The quality assessment and model performance results across the evaluated datasets.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Treatment</td><td align="left" valign="bottom">PSNR<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>, mean (SD)</td><td align="left" valign="bottom">Average SSIM<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup>, mean (SD)</td><td align="left" valign="bottom">Accuracy</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td></tr></thead><tbody><tr><td align="left" valign="top">Clean dataset</td><td align="left" valign="top">Inf</td><td align="char" char="." valign="top">1.0000</td><td align="char" char="." valign="top">0.9203</td><td align="char" char="." valign="top">0.915</td><td align="char" char="." valign="top">0.9119</td></tr><tr><td align="left" valign="top">Gaussian blur _0.5</td><td align="char" char="." valign="top">52.5622 (2.6778)</td><td align="char" char="." valign="top">0.9975 (0.0010)</td><td align="char" char="." valign="top">0.9031</td><td align="char" char="." valign="top">0.8896</td><td align="char" char="." valign="top">0.9017</td></tr><tr><td align="left" valign="top">Gaussian blur _1.0</td><td align="char" char="." valign="top">42.7419 (2.5447)</td><td align="char" char="." valign="top">0.9758 (0.0096)</td><td align="char" char="." valign="top">0.8422</td><td align="char" char="." valign="top">0.7836</td><td align="char" char="." valign="top">0.9085</td></tr><tr><td align="left" valign="top">Gaussian blur _1.5</td><td align="char" char="." valign="top">39.9809 (2.5143)</td><td align="char" char="." valign="top">0.9545 (0.0172)</td><td align="char" char="." valign="top">0.8078</td><td align="char" char="." valign="top">0.7299</td><td align="char" char="." valign="top">0.9254</td></tr><tr><td align="left" valign="top">Gaussian blur _2.0</td><td align="char" char="." valign="top">37.2251 (2.3945)</td><td align="char" char="." valign="top">0.9183 (0.0272)</td><td align="char" char="." valign="top">0.7625</td><td align="char" char="." valign="top">0.6819</td><td align="char" char="." valign="top">0.9085</td></tr><tr><td align="left" valign="top">Gaussian blur _2.5</td><td align="char" char="." valign="top">35.4838 (2.3231)</td><td align="char" char="." valign="top">0.8842 (0.0356)</td><td align="char" char="." valign="top">0.7250</td><td align="char" char="." valign="top">0.6530</td><td align="char" char="." valign="top">0.8610</td></tr><tr><td align="left" valign="top">Gaussian blur _3.0</td><td align="char" char="." valign="top">34.8328 (2.3077)</td><td align="char" char="." valign="top">0.8674 (0.0399)</td><td align="char" char="." valign="top">0.7203</td><td align="char" char="." valign="top">0.6526</td><td align="char" char="." valign="top">0.8407</td></tr><tr><td align="left" valign="top">Gaussian noise (grayscale)_1</td><td align="char" char="." valign="top">47.7838 (0.1157)</td><td align="char" char="." valign="top">0.9917 (0.0018)</td><td align="char" char="." valign="top">0.9109</td><td align="char" char="." valign="top">0.9190</td><td align="char" char="." valign="top">0.8847</td></tr><tr><td align="left" valign="top">Gaussian noise (grayscale)_2</td><td align="char" char="." valign="top">42.0190 (0.0432)</td><td align="char" char="." valign="top">0.9697 (0.0066)</td><td align="char" char="." valign="top">0.8906</td><td align="char" char="." valign="top">0.8840</td><td align="char" char="." valign="top">0.8780</td></tr><tr><td align="left" valign="top">Gaussian noise (grayscale)_3</td><td align="char" char="." valign="top">38.5493 (0.0308)</td><td align="char" char="." valign="top">0.9357 (0.0135)</td><td align="char" char="." valign="top">0.8688</td><td align="char" char="." valign="top">0.8436</td><td align="char" char="." valign="top">0.8780</td></tr><tr><td align="left" valign="top">Gaussian noise (grayscale)_4</td><td align="char" char="." valign="top">36.0689 (0.0271)</td><td align="char" char="." valign="top">0.8929 (0.0216)</td><td align="char" char="." valign="top">0.8422</td><td align="char" char="." valign="top">0.7994</td><td align="char" char="." valign="top">0.8780</td></tr><tr><td align="left" valign="top">Gaussian noise (grayscale)_5</td><td align="char" char="." valign="top">34.1382 (0.0267)</td><td align="char" char="." valign="top">0.8444 (0.0298)</td><td align="char" char="." valign="top">0.8172</td><td align="char" char="." valign="top">0.7602</td><td align="char" char="." valign="top">0.8814</td></tr><tr><td align="left" valign="top">Gaussian noise (grayscale)_6</td><td align="char" char="." valign="top">32.5600 (0.0276)</td><td align="char" char="." valign="top">0.7932 (0.0376)</td><td align="char" char="." valign="top">0.7734</td><td align="char" char="." valign="top">0.7027</td><td align="char" char="." valign="top">0.8814</td></tr><tr><td align="left" valign="top">Gaussian noise (RGB)_1</td><td align="char" char="." valign="top">47.7851 (0.1134)</td><td align="char" char="." valign="top">0.9917 (0.0018)</td><td align="char" char="." valign="top">0.9156</td><td align="char" char="." valign="top">0.9288</td><td align="char" char="." valign="top">0.8847</td></tr><tr><td align="left" valign="top">Gaussian noise (RGB)_2</td><td align="char" char="." valign="top">42.0211 (0.0368)</td><td align="char" char="." valign="top">0.9697 (0.0065)</td><td align="char" char="." valign="top">0.9094</td><td align="char" char="." valign="top">0.9100</td><td align="char" char="." valign="top">0.8915</td></tr><tr><td align="left" valign="top">Gaussian noise (RGB)_3</td><td align="char" char="." valign="top">38.5487 (0.0225)</td><td align="char" char="." valign="top">0.9357 (0.0135)</td><td align="char" char="." valign="top">0.8969</td><td align="char" char="." valign="top">0.8962</td><td align="char" char="." valign="top">0.8780</td></tr><tr><td align="left" valign="top">Gaussian noise (RGB)_4</td><td align="char" char="." valign="top">36.0686 (0.0180)</td><td align="char" char="." valign="top">0.8929 (0.0215)</td><td align="char" char="." valign="top">0.8891</td><td align="char" char="." valign="top">0.8784</td><td align="char" char="." valign="top">0.8814</td></tr><tr><td align="left" valign="top">Gaussian noise (RGB)_5</td><td align="char" char="." valign="top">34.1376 (0.0173)</td><td align="char" char="." valign="top">0.8444 (0.0299)</td><td align="char" char="." valign="top">0.8547</td><td align="char" char="." valign="top">0.8279</td><td align="char" char="." valign="top">0.8644</td></tr><tr><td align="left" valign="top">Gaussian noise (RGB)_6</td><td align="char" char="." valign="top">32.5599 (0.0168)</td><td align="char" char="." valign="top">0.7931 (0.0376)</td><td align="char" char="." valign="top">0.8219</td><td align="char" char="." valign="top">0.7701</td><td align="char" char="." valign="top">0.8746</td></tr><tr><td align="left" valign="top">JPEG compression_10 (slightest)</td><td align="char" char="." valign="top">44.5076 (1.7566)</td><td align="char" char="." valign="top">0.9829 (0.0046)</td><td align="char" char="." valign="top">0.9141</td><td align="char" char="." valign="top">0.9255</td><td align="char" char="." valign="top">0.8847</td></tr><tr><td align="left" valign="top">JPEG compression_30</td><td align="char" char="." valign="top">40.8137 (1.6190)</td><td align="char" char="." valign="top">0.9619 (0.0091)</td><td align="char" char="." valign="top">0.8922</td><td align="char" char="." valign="top">0.9154</td><td align="char" char="." valign="top">0.8441</td></tr><tr><td align="left" valign="top">JPEG compression_50</td><td align="char" char="." valign="top">38.7102 (1.4519)</td><td align="char" char="." valign="top">0.9411 (0.0119)</td><td align="char" char="." valign="top">0.8688</td><td align="char" char="." valign="top">0.8893</td><td align="char" char="." valign="top">0.8169</td></tr><tr><td align="left" valign="top">JPEG compression_70</td><td align="char" char="." valign="top">36.7313 (1.3293)</td><td align="char" char="." valign="top">0.9125 (0.0150)</td><td align="char" char="." valign="top">0.8609</td><td align="char" char="." valign="top">0.8732</td><td align="char" char="." valign="top">0.8169</td></tr><tr><td align="left" valign="top">JPEG compression_90 (strongest)</td><td align="char" char="." valign="top">32.6132 (1.1419)</td><td align="char" char="." valign="top">0.8146 (0.0233)</td><td align="char" char="." valign="top">0.7656</td><td align="char" char="." valign="top">0.6845</td><td align="char" char="." valign="top">0.9119</td></tr><tr><td align="left" valign="top">CLAHE<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup>_1</td><td align="char" char="." valign="top">27.5678 (1.9698)</td><td align="char" char="." valign="top">0.932 (0.0095)</td><td align="char" char="." valign="top">0.8875</td><td align="char" char="." valign="top">0.9176</td><td align="char" char="." valign="top">0.8305</td></tr><tr><td align="left" valign="top">CLAHE_2</td><td align="char" char="." valign="top">21.3350 (1.4853)</td><td align="char" char="." valign="top">0.7836 (0.0182)</td><td align="char" char="." valign="top">0.8250</td><td align="char" char="." valign="top">0.9256</td><td align="char" char="." valign="top">0.6746</td></tr><tr><td align="left" valign="top">CLAHE_3</td><td align="char" char="." valign="top">18.7906 (1.3643)</td><td align="char" char="." valign="top">0.6860 (0.0220)</td><td align="char" char="." valign="top">0.7953</td><td align="char" char="." valign="top">0.9457</td><td align="char" char="." valign="top">0.5898</td></tr><tr><td align="left" valign="top">CLAHE_4</td><td align="char" char="." valign="top">16.6896 (1.2363)</td><td align="char" char="." valign="top">0.5904 (0.0262)</td><td align="char" char="." valign="top">0.7812</td><td align="char" char="." valign="top">0.9641</td><td align="char" char="." valign="top">0.5458</td></tr><tr><td align="left" valign="top">CLAHE_5</td><td align="char" char="." valign="top">15.6361 (1.1716)</td><td align="char" char="." valign="top">0.5395 (0.0306)</td><td align="char" char="." valign="top">0.7703</td><td align="char" char="." valign="top">0.9353</td><td align="char" char="." valign="top">0.5390</td></tr><tr><td align="left" valign="top">Resize_400</td><td align="char" char="." valign="top">27.4436 (2.8561)</td><td align="char" char="." valign="top">0.6578 (0.1013)</td><td align="char" char="." valign="top">0.7188</td><td align="char" char="." valign="top">0.6353</td><td align="char" char="." valign="top">0.9153</td></tr><tr><td align="left" valign="top">Resize_600</td><td align="char" char="." valign="top">29.2871 (3.0279)</td><td align="char" char="." valign="top">0.7108 (0.1042)</td><td align="char" char="." valign="top">0.8172</td><td align="char" char="." valign="top">0.7514</td><td align="char" char="." valign="top">0.9017</td></tr><tr><td align="left" valign="top">Resize_800</td><td align="char" char="." valign="top">30.8330 (3.07574)</td><td align="char" char="." valign="top">0.7651 (0.1007)</td><td align="char" char="." valign="top">0.8516</td><td align="char" char="." valign="top">0.8106</td><td align="char" char="." valign="top">0.8847</td></tr><tr><td align="left" valign="top">Resize_1000</td><td align="char" char="." valign="top">32.3258 (3.6219)</td><td align="char" char="." valign="top">0.8130 (0.0932)</td><td align="char" char="." valign="top">0.8672</td><td align="char" char="." valign="top">0.8454</td><td align="char" char="." valign="top">0.8712</td></tr><tr><td align="left" valign="top">Resize_1200</td><td align="char" char="." valign="top">33.3882 (3.8631)</td><td align="char" char="." valign="top">0.8432 (0.08724)</td><td align="char" char="." valign="top">0.8797</td><td align="char" char="." valign="top">0.8682</td><td align="char" char="." valign="top">0.8712</td></tr><tr><td align="left" valign="top">Resize_1400</td><td align="char" char="." valign="top">34.5316 (3.6730)</td><td align="char" char="." valign="top">0.8754 (0.0748)</td><td align="char" char="." valign="top">0.8938</td><td align="char" char="." valign="top">0.8874</td><td align="char" char="." valign="top">0.8814</td></tr><tr><td align="left" valign="top">Geometrics_1</td><td align="left" valign="top">N/A<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">N/A</td><td align="char" char="." valign="top">0.8635</td><td align="char" char="." valign="top">0.8524</td><td align="char" char="." valign="top">0.8513</td></tr><tr><td align="left" valign="top">Geometrics_2</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="char" char="." valign="top">0.8949</td><td align="char" char="." valign="top">0.9003</td><td align="char" char="." valign="top">0.8682</td></tr><tr><td align="left" valign="top">Geometrics_3</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="char" char="." valign="top">0.8827</td><td align="char" char="." valign="top">0.8765</td><td align="char" char="." valign="top">0.8678</td></tr><tr><td align="left" valign="top">Geometrics_4</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="char" char="." valign="top">0.9021</td><td align="char" char="." valign="top">0.9041</td><td align="char" char="." valign="top">0.8809</td></tr><tr><td align="left" valign="top">12-bit-rescale_1</td><td align="char" char="." valign="top">48.0188 (8.0564)</td><td align="char" char="." valign="top">0.9945 (0.0181)</td><td align="char" char="." valign="top">0.9188</td><td align="char" char="." valign="top">0.9119</td><td align="char" char="." valign="top">0.9119</td></tr><tr><td align="left" valign="top">12-bit-rescale_2</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="char" char="." valign="top">0.9141</td><td align="char" char="." valign="top">0.9027</td><td align="char" char="." valign="top">0.9119</td></tr><tr><td align="left" valign="top">Screenshot_MicroDicom_1</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="char" char="." valign="top">0.8281</td><td align="char" char="." valign="top">0.7666</td><td align="char" char="." valign="top">0.9017</td></tr><tr><td align="left" valign="top">Screenshot_MicroDicom_2</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="char" char="." valign="top">0.8625</td><td align="char" char="." valign="top">0.8439</td><td align="char" char="." valign="top">0.8610</td></tr><tr><td align="left" valign="top">Screenshot_ImageJ</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="char" char="." valign="top">0.7812</td><td align="char" char="." valign="top">0.7123</td><td align="char" char="." valign="top">0.8814</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>PSNR: peak signal-to-noise ratio.</p></fn><fn id="table2fn2"><p><sup>b</sup>SSIM: structural similarity index measure.</p></fn><fn id="table2fn3"><p><sup>c</sup>CLAHE: contrast-limited adaptive histogram equalization.</p></fn><fn id="table2fn4"><p><sup>d</sup>N/A: not applicable.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>The Effects of Noise and Perturbations</title><p>This section discusses model performance on datasets directly modified from the cropped scaphoid radiographs and resized datasets without geometric modifications. <xref ref-type="table" rid="table2">Table 2</xref> presents the quality assessment and model performance results across the evaluated datasets. Specifically, datasets that underwent blurring, Gaussian noise addition, JPEG corruption, CLAHE, or resizing are reported in rows 1&#x2013;35; geometric datasets in rows 36-39; bit-rescaled datasets in rows 40-41; and screenshot datasets in rows 42-44. Further details can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>For datasets that included cropped scaphoid radiographs and resized images, if the severity of image degradation was minimal, the effect on model accuracy was negligible. An important observation was that the model exhibited varying degrees of resilience against different image perturbations. Even in some datasets with similar PSNR and SSIM values and comparable image quality, some discrepancies in model performances were observed. These findings underscore the nuanced robustness of the deep learning model against different image distortions.</p><p>On some noisy datasets, such as Gaussian blur_0.5, Gaussian noise (RGB)_1, Gaussian noise (grayscale)_1, and JPEG compression_10, the model performances were similar to that on the clean dataset. These datasets had the least severe distortions; the pixel values changed minimally, as indicated by the high PSNR and SSIM values. However, as the perturbations increased in severity, image features deteriorated further, and model performance declined. This trend was observed across all treatments, although the extent of performance decline varied according to the type of perturbation. We conclude that if the severity of a perturbation can be maintained below a specific level in degraded images, the model can maintain good performance.</p></sec><sec id="s3-3"><title>Robustness Against Different Types of Perturbations</title><p>Next, we compared model robustness across different image perturbations. Although some datasets yielded similar average PSNR and SSIM assessments, model performances differed. This finding suggested that the model is more robust against certain types of distortions but more vulnerable to others.</p><sec id="s3-3-1"><title>Color and Grayscale Gaussian Noise</title><p>We expected that the PSNR and SSIM assessments would be similar after the introduction of Gaussian noise, regardless of whether the noise was in color or grayscale. This expectation was confirmed by the similar average PSNR and SSIM values of the corresponding RGB and grayscale Gaussian noise datasets. Nevertheless, model accuracy was considerably lower when grayscale noise was present, suggesting that the model solely enhanced resilience to color perturbations. This may be explained by the fact that the training set was exclusively composed of grayscale samples, although the model input layer accepted 3-channel color images.</p></sec><sec id="s3-3-2"><title>Gaussian Blurring</title><p>Gaussian blurring substantially degraded model performance, comparatively more than other noises with similar image quality metrics. For images with PSNR values between 35 and 50, linear regression analysis revealed that Gaussian blurring was associated with a 5% to 12% decrease in accuracy compared with other nongeometric transformation techniques. Although blurring an image does not significantly decrease image quality, blurring likely affects fracture features and thus negatively affects detection. This finding is consistent with the results of a previous study, which concluded that neural networks are very sensitive to blurring, probably because textures and edges are removed [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>].</p></sec><sec id="s3-3-3"><title>JPEG Compression</title><p>The effect of JPEG compression on model performance was similar to the effect of grayscale Gaussian noise. The neural network was surprisingly resilient to JPEG compression. Even after a 90% file size reduction via JPEG compression, the model achieved an accuracy of greater than 81%. This finding has important clinical implications: DICOM images, which are often larger than 20 MB, can be compressed by up to 70% via JPEG, but the model will maintain 90% accuracy in terms of detecting scaphoid fractures. This capability will render model implementation more computationally efficient.</p></sec><sec id="s3-3-4"><title>Resizing</title><p>The resizing perturbations were designed to simulate a realistic low-resolution workflow in which radiographs may be downsampled before region extraction and later rescaled for AI inference; therefore, this setting reflects the combined effects of downscaling information loss and upscaling or interpolation artifacts from small cropped regions of interest (ROIs), rather than a purely isolated resolution test. The downscaling treatments required the new image to store information using fewer pixels than the original, which forced the image to compress its content. As a result, a substantial amount of information was lost when the number of available storage units was reduced, leading to blurry images and lowered image quality, as demonstrated by the PSNR and SSIM values. However, fracture detection accuracy did not significantly decline until the PSNR values fell below 30. Compared with other noise treatments, resizing an image may adequately preserve the features required for fracture detection.</p></sec><sec id="s3-3-5"><title>Contrast-Limited Adaptive Histogram Equalization</title><p>CLAHE treatment enhances image contrast and thus dramatically alters the image [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. A CLAHE-enhanced image can inadvertently mislead the model, especially if the model has not been trained with CLAHE-augmented data. Model accuracy decreased as the parameter &#x201C;clip limit&#x201D; (maximum limit of the adaptive histogram equalization) increased, considerably lowering the PSNR and SSIM values. However, the poor metric values do not necessarily imply that CLAHE transformation worsens the model performance more severely than that of other perturbations. Even at a cliplimit of 2, the PSNR declined to 21, lower than the PSNR after any Gaussian blur or JPEG treatment, and the model accuracy decreased to 83%. As the cliplimit was subsequently increased, the accuracy precipitously fell to 77% (<xref ref-type="table" rid="table2">Table 2</xref>). This finding differs from the result of a previous study, in which neural networks were resilient against changes in image contrast [<xref ref-type="bibr" rid="ref17">17</xref>]. This discrepancy may be because Dodge and Karam [<xref ref-type="bibr" rid="ref5">5</xref>] investigated contrast reduction only via grayscale image superimposition and assessed correct image classification (eg, a dog and a cat) using network models. In contrast, a scaphoid DNN must detect subtle linear features when identifying fractures, and CLAHE likely obscures contrast along the fracture line. CLAHE is useful when enhancing x-ray images before human interpretation, but it should only be used in neural networks that are trained via CLAHE augmentation. Additionally, if a chosen medical imaging software exhibits built-in CLAHE-enhancing features, the neural network must be trained with CLAHE-augmented data.</p></sec></sec><sec id="s3-4"><title>Differences in the Reductions of Precision and Recall Rates</title><p>The prevalence of scaphoid fractures in the test dataset was 46%. As all noisy datasets were derived from this dataset, the prevalences were identical. Accordingly, a decline in model accuracy can be attributed to either reduced precision or recall rates, with the former creating more false positives and the latter creating more false negatives.</p><p>In the last step of training, the model was fine-tuned to achieve precision and recall rates of 91%. We expected that these performance metrics would decrease when interpreting noisy datasets. Intriguingly, the severities of performance decline differed for precision and recall. Although most noisy datasets triggered declines in both precision and recall, the performance deteriorations exhibited by the precision rates were more pronounced. However, the recall rate was usually acceptable, even in heavily altered datasets. As the model seeks to identify all possibly useful scaphoid fractures, it remains clinically robust in terms of detecting fractures despite image perturbation, but at the cost of increased false-positive rates.</p><p>Only a few datasets showed the opposite, with a precision rate much greater than the recall rate; these were the JPEG compression_50, JPEG compression_70, and all CLAHE datasets. Conversely, the remaining JPEG compression datasets demonstrated a higher recall than precision rate, similar to other changes. Therefore, JPEG compression is likely not the principal explanation for such findings.</p><p>However, we found that CLAHE image treatment must be performed with caution. CLAHE primarily reduces recall (increasing false negatives) while precision tends to increase, indicating a heightened risk of missed fractures at higher clip limits (<xref ref-type="table" rid="table2">Table 2</xref>). If the images to be interpreted are of low quality and CLAHE enhancements are applied, machine learning scientists should be wary of inadvertently increasing the false-negative rates.</p></sec><sec id="s3-5"><title>Relationship Between Image Quality Assessments and Model Performance</title><p>We combined all noisy datasets (excluding the CLAHE datasets, given their heterogeneity) into a single dataset with 18,560 samples exhibiting various perturbations. We used this combined dataset to investigate the relationship between image quality and model performance.</p><p>We calculated 25 quantiles of SSIM value distributions and grouped the images according to quantile; this approach yielded 25 groups with 742 or 743 samples each. Our grouping method effectively randomized the images and eliminated the effect of any particular degradation treatment when grouping images from different datasets by image quality. We regrouped images by SSIM quantiles, rather than PSNR quantiles, because the distributions of SSIM values within each noisy dataset were wider and enabled easier stratification.</p><p>For each group, the average PSNR, average SSIM, and model accuracy were calculated (<xref ref-type="fig" rid="figure3">Figure 3</xref>). We found a strong linear relationship between average image quality and model accuracy. The model performed better on images with higher quality assessments.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>The images were divided into 25 groups according to the structural similarity index measure (SSIM) value quantiles. PSNR: peak signal-to-noise ratio.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e65596_fig03.png"/></fig><p>The average PSNR, average SSIM, and group accuracy for each were plotted; the accuracy exhibited strong linear relationships with both image quality assessments.</p><p>Both PSNR and SSIM regressions yielded high adjusted R-squared values (<italic>P</italic>&#x003C;.001). As the PSNR and SSIM values are not independent, it is reasonable to expect that the 2 assessments would yield similar results for most image degradations [<xref ref-type="bibr" rid="ref21">21</xref>]. However, the SSIM was normalized, whereas the PSNR was not. Robust linear associations between image quality evaluations and model performances were evident. Specifically, PSNR and SSIM, indicators of image quality, served as reliable independent tests that predicted AI performance under various perturbations. Higher image quality evaluations were invariably correlated with superior model performances. Although certain perturbations, such as geometric offsets or pixel value rescaling, did not appear to influence model performance, resolution was identified as a key factor, particularly in screenshot images. An enhanced focus on the ROI, such as zooming, effectively averted accuracy reduction. Recent research introduced a framework for the creation of Robust Medical Imaging AI models, which focuses on developing robust AI models for chest radiographs by addressing real-world sources of image degradation such as device heterogeneity, screen-captured inputs, and compression artifacts [<xref ref-type="bibr" rid="ref22">22</xref>].</p></sec><sec id="s3-6"><title>Effects of Geometric Offsets</title><p>CNN models extract hierarchical features from local regions of an image through convolution and pooling operations. In principle, a moderate change in object location within the image should therefore not drastically confuse the model. Our results generally reflected this expectation: the 4 geometrically modified datasets retained good overall accuracy when the scaphoid remained fully within the field of view. However, accuracy declined as the strength of the affine transformations increased. A likely explanation is that parts of the scaphoid were shifted outside the image boundaries, resulting in cropped or incomplete scaphoid regions being presented to the model. Such extreme geometric distortions are unlikely to occur under real-world clinical conditions.</p><p>To further assess whether the detection model contributed to performance degradation under realistic settings, we evaluated its robustness on the 3 real-world screenshot datasets and the 12-bit-rescale_2 dataset and compared these results with those from the 4 geometric offset datasets. As summarized in <xref ref-type="table" rid="table3">Table 3</xref>, the mean intersection over union (IoU) values for the screenshot and 12-bit-rescale_2 datasets (0.8728, SD 0.0679 to 0.9836, SD 0.0268) were consistently higher than those of the geometric offset datasets (0.5752, SD 0.0952 to 0.8530, SD 0.0222). The geometric offset datasets (Geometrics_1&#x2010;4) were intentionally designed as synthetic baselines to simulate pure localization errors of the detector. The lower IoU values in Geometrics_1&#x2010;3 (0.5752&#x2010;0.7546) correspond to increasingly large artificial offsets, whereas Geometrics_4 (mean 0.8530, SD 0.0222) represents the least-modified geometric condition in which the scaphoid remains fully visible after offset treatment. Notably, <xref ref-type="table" rid="table3">Table 3</xref> presents that the detector achieved IoU values exceeding Geometrics_4 on all screenshot datasets, suggesting that the detection model remains stable under realistic screenshot-related degradation.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>IoU values of the datasets which the detection model was involved with and the geometric datasets.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Datasets involve detection model</td><td align="left" valign="bottom">IoU<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>, mean (SD)</td></tr></thead><tbody><tr><td align="left" valign="top">Screenshot_MicroDicom_1</td><td align="left" valign="top">0.8768 (0.0625)</td></tr><tr><td align="left" valign="top">Screenshot_MicroDicom_2</td><td align="left" valign="top">0.8728 (0.0679)</td></tr><tr><td align="left" valign="top">Screenshot_ImageJ</td><td align="left" valign="top">0.8907 (0.0554)</td></tr><tr><td align="left" valign="top">12-bit-rescale_2</td><td align="left" valign="top">0.9836 (0.0268)</td></tr><tr><td align="left" valign="top">Geometric datasets<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Geometrics_1</td><td align="left" valign="top">0.5752 (0.0952)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Geometrics_2</td><td align="left" valign="top">0.7546 (0.0662)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Geometrics_3</td><td align="left" valign="top">0.7063 (0.0529)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Geometrics_4</td><td align="left" valign="top">0.8530 (0.0222)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>IoU: intersection over union.</p></fn><fn id="table3fn2"><p><sup>b</sup>Considering geometric datasets IoU as baseline, the detection model performed better on all the screenshot datasets, showing its robustness toward perturbations, and that the performance of the detection model would not be a major factor leading to failure of the classification model when image degradation occurs.</p></fn></table-wrap-foot></table-wrap><p>As the classification model still maintained reasonable accuracy across the geometric baselines, IoU values comparable to or higher than Geometrics_4 indicate that under real-world screenshot- and rescaling-related perturbations, the detector&#x2019;s localization accuracy is at least similar to its best geometric baseline performance and therefore is less likely to be a dominant contributor to the observed classification performance changes. Overall, these findings suggest that detector-related bounding box offsets likely play a limited role in the performance degradation observed in these realistic conditions.</p></sec><sec id="s3-7"><title>Effects of Pixel Value Rescaling Methods</title><p>The 2 methods for rescaling images from 12-bit to 8-bit did not materially affect model performance. The 2 datasets derived from the alternative rescaling method, 12-bit-rescale_1 and 12-bit-rescale_2, showed only a minor decrease in accuracy (&#x003C;1%), although the rescaled images were nearly identical. Examination of the few inconsistent cases revealed that the classification confidence scores for &#x201C;fracture&#x201D; and &#x201C;nonfracture&#x201D; were much closer in these instances, with differences typically on the order of 10<sup>2</sup>. In contrast, in most other cases, the confidence score gap exceeded 10<sup>3</sup>, indicating stronger and more stable model decisions. These relatively uncertain confidence scores suggest that the inconsistent cases were inherently difficult for the model to judge, even without perturbation. As the model&#x2019;s decisions for such cases were already unstable, slight modifications introduced by the alternative rescaling algorithms&#x2014;despite causing only minimal pixel value changes&#x2014;could flip the predicted label. This likely explains the minor performance differences observed among the clean dataset and the 2 rescaled datasets (12-bit-rescale_1 and 12-bit-rescale_2).</p></sec><sec id="s3-8"><title>Screenshot Datasets</title><p>Taking screenshots was the most complex perturbation of all the studied treatments. Resolution may be the factor that most strongly affects model performance. The only difference between the datasets Screenshot_MicroDicom_1 and Screenshot_MicroDicom_2 was the resolution; changing the resolution from 900&#x00D7;1050 to 550&#x00D7;780 pixels resulted in an accuracy reduction from 86.3% to 82.8%. The dataset Screenshot_ImageJ, the dataset with images resized to a width of 600 pixels while keeping the aspect ratio collected using ImageJ, exhibited the worst resolution and worst read accuracy (78.1%). Such declines in accuracy, precision rate, and recall rate were similar to declines observed in the corresponding resized datasets (Screenshot_MicroDicom_1 with Resize_600, Screenshot_MicroDicom_2 with Resize_1000, and Screenshot_ImageJ with Resize_600). Although the detection model had been involved in the preparation of these datasets, <xref ref-type="table" rid="table3">Table 3</xref> implies that in these cases, scaphoids were correctly detected and cropped. In conjunction with the fact that the classification performance of screenshot datasets was worse than that of any of the geometric datasets, the degradation in model performance indeed stems from resizing effects. We also observed that although resolution deterioration was the major factor, the screenshot process did not impact model performance identically to pure resizing. Specifically, Screenshot_ImageJ showed a modest but consistent performance drop compared with its corresponding resized dataset (Resize_600) and also performed worse than another screenshot dataset (Screenshot_MicroDicom_1) collected at a similar resolution. This suggests that viewer-specific display or resampling algorithms during screenshot capture may also influence model performance.</p><p>On the basis of these results, one strategy that may prevent accuracy reduction after taking screenshots would be to zoom into the image and then take a screenshot at the 100% level. Although the radiographic image subsequently may not fit within the screen display, it is unnecessary to capture the entire image because the only ROI is the scaphoid.</p></sec><sec id="s3-9"><title>Computational Cost and Inference Time</title><p>We evaluated the average inference time by running through the 640 whole-hand x-ray images from the clean testing dataset using the complete preprocessing, bounding box detection, and classification inference pipeline. We obtained a computation time of approximately 6 to 8 ms per image on an NVIDIA RTX 3080 (10 GB RAM), while warmup runs were not included. The end-to-end pipeline used less than 200 MB GPU memory per 240&#x00D7;240&#x00D7;3 input (including framework overhead), enabling near real-time inference on a single RTX 3080. Consequently, our approach is compatible with near-real-time applications, assuming a well-optimized implementation. Although resizing or blurring images may incur a minor preprocessing overhead, these operations add only approximately 1 ms to the total inference time, indicating that real-time usage in clinical workflow is feasible on a modern GPU.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>Taken together, our findings highlight that the deep learning model&#x2019;s performance is inversely related to the severity of image degradation, with Gaussian blur, grayscale noise, and CLAHE standing out as the most disruptive factors. These types of distortions can destroy or obscure the crucial edge information that the network relies upon for detecting subtle fracture lines. In contrast, moderate JPEG compression, resizing, or color noise had a less pronounced effect. Notably, the resizing experiments in this study reflect a realistic low-resolution workflow involving downscaling, then cropping, and then upscaling, which is consistent with real-world recaptured or low-resolution imaging scenarios described in the literature [<xref ref-type="bibr" rid="ref9">9</xref>]. While accuracy decreases were comparable between Screenshot_ImageJ and the corresponding Resize datasets, Screenshot_ImageJ showed a modest additional reduction in recall, suggesting that screenshot recapture can affect sensitivity beyond pure resizing.</p><p>From a clinical perspective, these results underscore the importance of maintaining sufficient image fidelity and avoiding overly aggressive postprocessing steps. If CLAHE-based enhancement or heavy contrast adjustments are used, training the model with matching augmentations may be necessary to preserve performance. Similarly, our analysis indicates that modest compression does not necessarily compromise diagnostic accuracy, suggesting an avenue for reducing file sizes without compromising model output.</p><p>Although the network proved fairly robust across various perturbations, investigators should remain cautious when applying transformations, such as extreme blurring, under or overexposure, or very low-resolution screenshots. In these scenarios, crucial details may be irretrievably lost, leading to significantly higher misclassification rates. Our results also emphasize that the precision rate is especially sensitive to noise, often dropping faster than recall. This behavior means that some perturbations can inflate false positives, which may still be acceptable in screening contexts that favor high recall but could impact workflows that depend on precise diagnoses.</p><p>In certain clinical scenarios, obtaining the original radiographic file may not be immediately feasible&#x2014;such as in emergency settings, remote consultations, or when imaging systems have limited data access privileges. In such situations&#x2014;particularly in emergency department workflows where the original radiographic DICOM file may not be readily accessible because of time pressure, remote consultation needs, or limited system privileges&#x2014;clinicians or technicians may rely on on-screen screenshots or smartphone photographs for rapid review, sharing, or AI-assisted interpretation. Although practical, these recaptured images can introduce unintended degradation due to variations in capture resolution, scaling distortions, and secondary compression, which may obscure subtle fracture cues. This real-world practice motivated our inclusion of screenshot-based perturbations as a clinically relevant proxy for urgent or resource-limited conditions. To replicate such real-world conditions, our study incorporated a &#x201C;screenshot&#x201D; perturbation by capturing radiographs at different image sizes and resolutions. The resulting artifacts mimic the geometric and pixel-level distortions that may arise in urgent or resource-limited workflows, providing insight into how these factors influence model performance.</p><p>Although this study was conducted using data from a single institution, the dataset represents one of the largest and most diverse radiographic collections in Taiwan, drawn from multiple campuses within the Chang Gung Memorial Hospital system, with different scanners and protocols, introducing natural variability while maintaining consistent image quality and annotation standards. The main objective of this study was to test a methodological approach for evaluating model robustness to image degradation, rather than to claim broad generalization to all clinical environments. The single-institution design, therefore, provided a stable yet sufficiently varied dataset for controlled experimentation.</p></sec><sec id="s4-2"><title>Comparison With Prior Work</title><p>Our findings are consistent with prior studies showing that CNNs are sensitive to image degradation and domain shifts in medical imaging. Recent research introduced the RoMIA framework, which aims to develop robust AI models for chest radiographs by addressing real-world sources of image degradation, such as device heterogeneity, screen-captured inputs, and compression artifacts. In parallel, previous studies have shown that AI can enhance image quality and improve diagnostic reliability across multiple imaging modalities, including computed tomography and radiography [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. Moreover, real-world deployments have demonstrated that recaptured or smartphone-captured radiographs introduce compounded degradations (eg, scaling, compression, and display artifacts) that can alter AI outputs, underscoring the clinical relevance of evaluating robustness under such workflows [<xref ref-type="bibr" rid="ref9">9</xref>]. Taken together, these studies highlight the growing emphasis on image fidelity and consistent acquisition parameters as key factors for ensuring robust and generalizable AI performance. Previous research [<xref ref-type="bibr" rid="ref18">18</xref>] has demonstrated that blurring and contrast alteration primarily affect the high-frequency components essential for delineating structural boundaries&#x2014;similar to the degradation patterns observed in this study. The alignment between our results and those of earlier works reinforces that these degradation effects are likely intrinsic to CNN-based architectures rather than model-specific artifacts.</p></sec><sec id="s4-3"><title>Limitations</title><p>This study has several limitations. First, the robustness results reported here are relative to our standard clinical preprocessing pipeline. Specifically, the &#x201C;clean&#x201D; baseline was generated by detector-based cropping followed by resizing each scaphoid ROI to 240&#x00D7;240 pixels to match the fixed input size of EfficientNetB1. As this crop-and-resize step is itself a lossy operation, our robustness conclusions should be interpreted within this trained-and-deployed pipeline context, rather than as absolute performance relative to the raw, unprocessed DICOM images. Second, our analysis was confined to a single deep learning architecture, EfficientNetB1. Different backbones (eg, ResNet and DenseNet) may exhibit varying levels of robustness to specific noise types, such as Gaussian blur or CLAHE. Nevertheless, we expect that many of the general trends&#x2014;such as vulnerability to edge-destroying blur&#x2014;would hold true across CNN-based architectures. Third, all experiments were performed on retrospective datasets from a single institution and focused on controlled pixel-level perturbations. As a result, the data may not fully capture the diversity of real-world clinical acquisition conditions or additional artifacts, such as motion blur, partial occlusions, and multiview variability. Future prospective, multi-institutional studies incorporating these realistic factors would provide a more comprehensive assessment of model resilience and generalizability.</p></sec><sec id="s4-4"><title>Future Directions</title><p>Future work could extend our methodology by evaluating multiple architectures under identical perturbations to quantify differences in susceptibility and to determine whether certain network designs are inherently more robust. Incorporating multiview data and realistic clinical artifacts (eg, motion blur and underexposure) may also help develop more generalizable models. Additionally, advanced data augmentation strategies that simulate image degradation during training could enhance robustness. Exploring hybrid or ensemble models that integrate texture- and shape-based cues, as well as integrating domain-specific priors, could further mitigate the impact of low-quality inputs.</p></sec><sec id="s4-5"><title>Conclusions</title><p>Neural network models designed to complement radiographic interpretation in clinical practice will inevitably encounter image quality distortions due to variations in acquisition, processing, and storage. In this study, we systematically evaluated the effects of image degradation on the performance of a DNN for scaphoid fracture classification. We found a strong negative correlation between image quality and model accuracy, with Gaussian blur, grayscale Gaussian noise, and CLAHE exerting the greatest influence on performance.</p><p>Performance decline was primarily driven by decreases in precision, whereas recall remained relatively stable. When developing neural networks for fracture detection in clinical radiography, training with targeted perturbations&#x2014;particularly Gaussian blur, grayscale noise, and CLAHE&#x2014;may improve the model robustness and ensure more reliable performance in diverse clinical environments.</p></sec></sec></body><back><ack><p>No generative artificial intelligence was used in the preparation of this manuscript.</p></ack><notes><sec><title>Funding</title><p>This work was supported by a grant from the Center for Artificial Intelligence in Medicine (grant CORPG3L0181, CORPG3M0231, CORPG3K0201, and CLRPG3H0015) at Chang Gung Memorial Hospital and National Science and Technology Council (112-2410-H-182A-001).</p></sec><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are not publicly available due to institutional review board regulations and patient privacy considerations but are available from the corresponding author upon reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: CFK, KCC, CL</p><p>Data curation: CFK, CWW</p><p>Formal analysis: CL, TC</p><p>Methodology: CL, APY</p><p>Project administration: CFK, KCC</p><p>Resources: CFK, CWW</p><p>Software: CL, TC</p><p>Supervision: CFK, KCC</p><p>Validation: CL, TC</p><p>Writing &#x2013; original draft: CL, APY, TC</p><p>Writing &#x2013; review &#x0026; editing: APY, KCC, CWW</p><p>Co-first authors: CL, APY (corresponding author: CFK, KCC)</p></fn><fn fn-type="conflict"><p>KC receives funding from the National Institutes of Health and book royalties from Wolters Kluwer and Elsevier. All other authors declare that they have no conflicts of interest related to this work.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">CLAHE</term><def><p>contrast-limited adaptive histogram equalization</p></def></def-item><def-item><term id="abb3">CNN</term><def><p>convolutional neural network</p></def></def-item><def-item><term id="abb4">DNN</term><def><p>deep neural network</p></def></def-item><def-item><term id="abb5">IoU</term><def><p>intersection over union</p></def></def-item><def-item><term id="abb6">PSNR</term><def><p>peak signal-to-noise ratio</p></def></def-item><def-item><term id="abb7">RGB</term><def><p>red, green, and blue</p></def></def-item><def-item><term id="abb8">ROI</term><def><p>region of interest</p></def></def-item><def-item><term id="abb9">SSIM</term><def><p>structural similarity index measure</p></def></def-item><def-item><term id="abb10">VOI</term><def><p>value of interest</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gupta</surname><given-names>V</given-names> </name><name name-style="western"><surname>Erdal</surname><given-names>B</given-names> </name><name name-style="western"><surname>Ramirez</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Current state of community-driven radiological AI deployment in medical imaging</article-title><source>JMIR AI</source><year>2024</year><month>12</month><day>9</day><volume>3</volume><fpage>e55833</fpage><pub-id pub-id-type="doi">10.2196/55833</pub-id><pub-id pub-id-type="medline">39653370</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Hendrycks</surname><given-names>D</given-names> </name><name name-style="western"><surname>Dietterich</surname><given-names>T</given-names> </name></person-group><article-title>Benchmarking neural network robustness to common corruptions and perturbations</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 28, 2019</comment><pub-id pub-id-type="doi">10.48550/arXiv.1903.12261</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Habibi Aghdam</surname><given-names>H</given-names> </name><name name-style="western"><surname>Jahani Heravi</surname><given-names>E</given-names> </name><name name-style="western"><surname>Puig</surname><given-names>D</given-names> </name></person-group><article-title>Analyzing the stability of convolutional neural networks against image degradation</article-title><conf-name>International Conference on Computer Vision Theory and Applications</conf-name><conf-date>Feb 27-29, 2016</conf-date><conf-loc>Rome, Italy</conf-loc><fpage>370</fpage><lpage>382</lpage><pub-id pub-id-type="doi">10.5220/0005720703700382</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Hosseini</surname><given-names>H</given-names> </name><name name-style="western"><surname>Xiao</surname><given-names>B</given-names> </name><name name-style="western"><surname>Poovendran</surname><given-names>R</given-names> </name></person-group><article-title>Google&#x2019;s cloud vision API is not robust to noise</article-title><year>2017</year><conf-name>2017 16th IEEE International Conference on Machine Learning and Applications (ICMLA)</conf-name><conf-date>Dec 18-21, 2017</conf-date><pub-id pub-id-type="doi">10.1109/ICMLA.2017.0-172</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Dodge</surname><given-names>S</given-names> </name><name name-style="western"><surname>Karam</surname><given-names>L</given-names> </name></person-group><article-title>A study and comparison of human and deep learning recognition performance under visual distortions</article-title><year>2017</year><conf-name>2017 26th International Conference on Computer Communication and Networks (ICCCN)</conf-name><conf-date>Jul 31 to Aug 3, 2017</conf-date><conf-loc>Vancouver, BC, Canada</conf-loc><fpage>1</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1109/ICCCN.2017.8038465</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Fawzi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Moosavi-Dezfooli</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Frossard</surname><given-names>P</given-names> </name></person-group><article-title>Robustness of classifiers: from adversarial to random noise</article-title><conf-name>NIPS&#x2019;16: Proceedings of the 30th International Conference on Neural Information Processing Systems</conf-name><conf-date>Dec 5-10, 2016</conf-date><conf-loc>Barcelona, Spain</conf-loc><fpage>1632</fpage><lpage>1640</lpage><pub-id pub-id-type="doi">10.5555/3157096.3157279</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Effectiveness of AI for enhancing computed tomography image quality and radiation protection in radiology: systematic review and meta-analysis</article-title><source>J Med Internet Res</source><year>2025</year><month>02</month><day>27</day><volume>27</volume><fpage>e66622</fpage><pub-id pub-id-type="doi">10.2196/66622</pub-id><pub-id pub-id-type="medline">40053787</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Javed</surname><given-names>H</given-names> </name><name name-style="western"><surname>El-Sappagh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Abuhmed</surname><given-names>T</given-names> </name></person-group><article-title>Robustness in deep learning models for medical diagnostics: security and adversarial challenges towards robust AI applications</article-title><source>Artif Intell Rev</source><year>2025</year><volume>58</volume><issue>1</issue><fpage>12</fpage><pub-id pub-id-type="doi">10.1007/s10462-024-11005-9</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kuo</surname><given-names>PC</given-names> </name><name name-style="western"><surname>Tsai</surname><given-names>CC</given-names> </name><name name-style="western"><surname>L&#x00F3;pez</surname><given-names>DM</given-names> </name><etal/></person-group><article-title>Recalibration of deep learning models for abnormality detection in smartphone-captured chest radiograph</article-title><source>NPJ Digit Med</source><year>2021</year><month>02</month><day>15</day><volume>4</volume><issue>1</issue><fpage>25</fpage><pub-id pub-id-type="doi">10.1038/s41746-021-00393-9</pub-id><pub-id pub-id-type="medline">33589700</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yoon</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>YL</given-names> </name><name name-style="western"><surname>Kane</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Kuo</surname><given-names>CF</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>C</given-names> </name><name name-style="western"><surname>Chung</surname><given-names>KC</given-names> </name></person-group><article-title>Development and validation of a deep learning model using convolutional neural networks to identify scaphoid fractures in radiographs</article-title><source>JAMA Netw Open</source><year>2021</year><month>05</month><day>3</day><volume>4</volume><issue>5</issue><fpage>e216096</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2021.6096</pub-id><pub-id pub-id-type="medline">33956133</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hena</surname><given-names>B</given-names> </name><name name-style="western"><surname>Wei</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Castanedo</surname><given-names>CI</given-names> </name><name name-style="western"><surname>Maldague</surname><given-names>X</given-names> </name></person-group><article-title>Deep learning neural network performance on NDT digital x-ray radiography images: analyzing the impact of image quality parameters&#x2014;an experimental study</article-title><source>Sensors (Basel)</source><year>2023</year><month>04</month><day>27</day><volume>23</volume><issue>9</issue><fpage>4324</fpage><pub-id pub-id-type="doi">10.3390/s23094324</pub-id><pub-id pub-id-type="medline">37177528</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Nazar&#x00E9;</surname><given-names>TS</given-names> </name><name name-style="western"><surname>da Costa</surname><given-names>GBP</given-names> </name><name name-style="western"><surname>Contato W.</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ponti</surname><given-names>M</given-names> </name></person-group><article-title>Deep convolutional neural networks and noisy images</article-title><year>2018</year><conf-name>Progress in Pattern Recognition, Image Analysis, Computer Vision, and Applications</conf-name><conf-date>Nov 7-10, 2017</conf-date><conf-loc>Valpara&#x00ED;so, Chile</conf-loc><publisher-name>Springer International Publishing</publisher-name><pub-id pub-id-type="doi">10.1007/978-3-319-75193-1_50</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Tan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Le</surname><given-names>Q</given-names> </name></person-group><article-title>Efficientnet: rethinking model scaling for convolutional neural networks</article-title><year>2019</year><conf-name>Proceedings of the 36th International Conference on Machine Learning</conf-name><conf-date>Jun 9-15, 2019</conf-date><conf-loc>Long Beach, California, USA</conf-loc><fpage>6105</fpage><lpage>6114</lpage><pub-id pub-id-type="doi">10.48550/arXiv.1905.11946</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Bovik</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Sheikh</surname><given-names>HR</given-names> </name><name name-style="western"><surname>Simoncelli</surname><given-names>EP</given-names> </name></person-group><article-title>Image quality assessment: from error visibility to structural similarity</article-title><source>IEEE Trans Image Process</source><year>2004</year><month>04</month><volume>13</volume><issue>4</issue><fpage>600</fpage><lpage>612</lpage><pub-id pub-id-type="doi">10.1109/tip.2003.819861</pub-id><pub-id pub-id-type="medline">15376593</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name></person-group><article-title>The SSIM index for image quality assessment</article-title><source>Laboratory for Computational Vision, New York University</source><access-date>2025-12-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="http://www.cns.nyu.edu/~lcv/ssim">http://www.cns.nyu.edu/~lcv/ssim</ext-link></comment></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Vernon</surname><given-names>D</given-names> </name></person-group><source>Machine Vision: Automated Visual Inspection and Robot Vision</source><year>1991</year><access-date>2025-12-23</access-date><publisher-name>Prentice-Hall</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://archive.org/details/machinevisionaut0000vern">https://archive.org/details/machinevisionaut0000vern</ext-link></comment></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Dodge</surname><given-names>S</given-names> </name><name name-style="western"><surname>Karam</surname><given-names>L</given-names> </name></person-group><article-title>Understanding how image quality affects deep neural networks</article-title><conf-name>2016 Eighth International Conference on Quality of Multimedia Experience (QoMEX)</conf-name><conf-date>Jun 6-8, 2016</conf-date><conf-loc>Lisbon, Portugal</conf-loc><fpage>1</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1109/QoMEX.2016.7498955</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Gonzalez</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Woods</surname><given-names>RE</given-names> </name></person-group><source>Digital Image Processing</source><year>1992</year><publisher-name>Addison-Wesley Publishing Company</publisher-name><pub-id pub-id-type="other">9780201508031</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Zuiderveld</surname><given-names>K</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Heckbert</surname><given-names>PS</given-names> </name></person-group><article-title>Contrast limited adaptive histogram equalization</article-title><source>Graphics Gems IV</source><year>1994</year><publisher-name>Academic Press Professional Inc</publisher-name><fpage>474</fpage><lpage>485</lpage><pub-id pub-id-type="doi">10.1016/B978-0-12-336156-1.50061-6</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pizer</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Amburn</surname><given-names>EP</given-names> </name><name name-style="western"><surname>Austin</surname><given-names>JD</given-names> </name><etal/></person-group><article-title>Adaptive histogram equalization and its variations</article-title><source>Comput Vis Graph Image Process</source><year>1987</year><month>09</month><volume>39</volume><issue>3</issue><fpage>355</fpage><lpage>368</lpage><pub-id pub-id-type="doi">10.1016/S0734-189X(87)80186-X</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Hor&#x00E9;</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ziou</surname><given-names>D</given-names> </name></person-group><article-title>Image quality metrics: PSNR vs. SSIM</article-title><conf-name>2010 20th International Conference on Pattern Recognition</conf-name><conf-date>Aug 23-26, 2010</conf-date><conf-loc>Istanbul, Turkey</conf-loc><fpage>2366</fpage><lpage>2369</lpage><pub-id pub-id-type="doi">10.1109/ICPR.2010.579</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Anand</surname><given-names>A</given-names> </name><name name-style="western"><surname>Krithivasan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Roy</surname><given-names>K</given-names> </name></person-group><article-title>RoMIA: A framework for creating robust medical imaging AI models for chest radiographs</article-title><source>Front Radiol</source><year>2023</year><volume>3</volume><fpage>1274273</fpage><pub-id pub-id-type="doi">10.3389/fradi.2023.1274273</pub-id><pub-id pub-id-type="medline">38260820</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thirunavukarasu</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Elangovan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gutierrez</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Democratizing artificial intelligence imaging analysis with automated machine learning: tutorial</article-title><source>J Med Internet Res</source><year>2023</year><month>10</month><day>12</day><volume>25</volume><fpage>e49949</fpage><pub-id pub-id-type="doi">10.2196/49949</pub-id><pub-id pub-id-type="medline">37824185</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Representative examples of all image perturbation types evaluated in this study.</p><media xlink:href="medinform_v14i1e65596_app1.docx" xlink:title="DOCX File, 16385 KB"/></supplementary-material></app-group></back></article>